def context_supervision_loss(self, distance, lw=1, ind_loss=None): """ Distance is positive; want gt distance to be SMALLER than other distances. Loss used for context supervision is also ranking loss: Look at rank loss between all possible pairs of moments; want gt distance to be smaller. Take average. """ slices = L.Slice(distance, ntop=21, axis=1) gt = slices[0] setattr(self.n, 'gt_slice', gt) ranking_losses = [] for i in range(1, 21): setattr(self.n, 'context_slice_%d' % i, slices[i]) negate_distance = L.Power(slices[i], scale=-1) max_sum = L.Eltwise(gt, negate_distance, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) if ind_loss: max_sum_margin_relu = L.Reshape( max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1])) max_sum_margin_relu = L.Eltwise(max_sum_margin_relu, ind_loss, operation=0) setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4) ranking_losses.append(ranking_loss) sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1) loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw]) return loss
def test_power3(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) # These two powers can not be united n.pow1 = L.Power(n.input1, power=2.0) n.pow2 = L.Power(n.pow1, scale=0.3) self._test_model(*self._netspec_to_model(n, 'power3'))
def scat_layer(bottom, dim, kernel_size, name, group=1): conv1 = conv_layer(bottom, dim, kernel_size, name + '_real', group=group) pow1 = L.Power(conv1, power=2, in_place=True) conv2 = conv_layer(bottom, dim, kernel_size, name + '_imag', group=group) pow2 = L.Power(conv2, power=2, in_place=True) res_add = add(pow1, pow2) res_add = L.Power(res_add, power=.5, in_place=True) return res_add
def test_power3(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) # These two powers can not be united n.pow1 = L.Power(n.input1, power=2.0) n.pow2 = L.Power(n.pow1, scale=0.3) return n
def normalize(self, bottom, axis=1, numtiles=4096): power = L.Power(bottom, power=2) power_sum = L.Reduction(power, axis=axis, operation=1) sqrt = L.Power(power_sum, power=-0.5, shift=0.00001) if axis == 1: reshape = L.Reshape(sqrt, shape=dict(dim=[-1, 1])) if axis == 2: reshape = L.Reshape(sqrt, shape=dict(dim=[self.batch_size, -1, 1])) tile = L.Tile(reshape, axis=axis, tiles=numtiles) return L.Eltwise(tile, bottom, operation=0)
def tall_loss(self, positive, negative, query, lw=1): scores_p = self.distance_function(positive, query) scores_n = self.distance_function(negative, query) alpha_c = 1 alpha_w = 1 exp_p = L.Exp(scores_p, scale=-1) exp_n = L.Exp(scores_n) log_p = L.Log(exp_p, shift=1) log_n = L.Log(exp_n, shift=1) scale_p = L.Power(log_p, scale=alpha_c) scale_n = L.Power(log_n, scale=alpha_w) all_scores = L.Concat(scale_p, scale_n, axis=0) return L.Reduction(all_scores, operation=4, loss_weight=[lw])
def ranking_loss(self, p, n, t, lw=1): #For ranking used in paper distance_p = self.distance_function(p, t) distance_n = self.distance_function(n, t) negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def ranking_loss(self, p, n, t, lw=1): # I <3 Caffe - this is not obnoxious to write at all. distance_p = self.distance_function(p, t) distance_n = self.distance_function(n, t) negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def relational_ranking_loss(self, distance_p, distance_n, lw=1): """ This function assumes you want to MINIMIZE distances """ negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def early_combine_mult_not_relational(self, vec1, vec2): mult = L.Eltwise(vec1, vec2, operation=0) setattr(self.n, 'mult', mult) norm_mult = self.normalize(mult, numtiles=self.visual_embedding_dim[-1], axis=1) setattr(self.n, 'norm_mult', norm_mult) intermediate = L.InnerProduct( norm_mult, num_output=self.visual_embedding_dim[-1], weight_filler=self.uniform_weight_filler(-0.08, .08), param=self.learning_params([[1, 1], [2, 0]], ['eltwise_dist1', 'eltwise_dist1_b']), axis=1) nonlin_1 = L.ReLU(intermediate) setattr(self.n, 'intermediate', nonlin_1) dropout = L.Dropout(nonlin_1, dropout_ratio=self.dropout_visual) score = L.InnerProduct( dropout, num_output=1, weight_filler=self.uniform_weight_filler(-0.08, .08), param=self.learning_params([[1, 1], [2, 0]], ['eltwise_dist2', 'eltwise_dist2_b']), axis=1) negative_score = L.Power(score, scale=-1) setattr(self.n, 'rank_score', score) return score
def test_power(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.pow1 = L.Power(n.input1, power=2.0, scale=0.5, shift=0.01) return n
def Power(bottom, name='power', p=1, a=1, b=0): # (ax+b)^p return L.Power(bottom, name=name, power_param={ 'power': p, 'scale': a, 'shift': b })
def l2normed(self,vec, dim): #Returns L2-normalized instances of vec; i.e., for each instance x in vec, #computes x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim.""" denom = L.Reduction(vec, axis=1, operation=P.Reduction.SUMSQ) denom = L.Power(denom, power=(-0.5), shift=1e-12) denom = L.Reshape(denom, num_axes=0, axis=-1, shape=dict(dim=[1])) denom = L.Tile(denom, axis=1, tiles=dim) return L.Eltwise(vec, denom, operation=P.Eltwise.PROD)
def weight_edges2(bottom, num_output, power=1.0): bottom_avg = L.Convolution(bottom, convolution_param=dict(num_output=num_output, kernel_size=1, stride=1, pad=0, bias_term=False, weight_filler=dict( type='constant', value=1.0)), param=[{ 'lr_mult': 0, 'decay_mult': 0 }]) binarized = L.Power(bottom_avg, power_param=dict(power=power)) weight = L.Power(binarized, power_param=dict(shift=1, scale=-1)) return weight
def normalize(bottom, dim): bottom_relu = L.ReLU(bottom) sum = L.Convolution(bottom_relu, convolution_param = dict(num_output = 1, kernel_size = 1, stride = 1, weight_filler = dict(type = 'constant', value = 1), bias_filler = dict(type = 'constant', value = 0)), param=[{'lr_mult':0, 'decay_mult':0}, {'lr_mult':0, 'decay_mult':0}]) denom = L.Power(sum, power=(-1.0), shift=1e-12) denom = L.Tile(denom, axis=1, tiles=dim) return L.Eltwise(bottom_relu, denom, operation=P.Eltwise.PROD)
def pool_distances(self, vec, minimum_distance=True): #want to MINIMIZE distance; negate, maximize, then negate (again) #Assume that scores are Nx21 size blob if args.pool_type in ['max', 'average']: prep_pool = L.Reshape(vec, shape=dict(dim=[self.batch_size, 1, 21, 1])) if minimum_distance: prep_pool = L.Power(prep_pool, scale=-1) max_pool = L.Pooling(prep_pool, pool=pooling_type[self.args.pool_type], kernel_h=21, kernel_w=1) pool = L.Reshape(max_pool, shape=dict(dim=[self.batch_size])) if minimum_distance: pool = L.Power(pool, scale=-1) elif args.pool_type in ['sum']: #untested negative = L.Power(vec, scale=-1) pool = L.Reduction(negative, axis=1, operation=1) #sum else: raise Exception("You did not select a valid pooling type.") return pool
def l2normed(dim): n = caffe.NetSpec() n.data, n.label = L.Python(module='layers', layer='tripletDataLayer', ntop=2) """Returns L2-normalized instances of vec; i.e., for each instance x in vec, computes x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim.""" n.denom = L.Reduction(n.data, axis=1, operation=P.Reduction.SUMSQ) #denom = L.Power(denom, power=(-0.5)) n.power = L.Power(n.denom, power=(-0.5), shift=1e-12) # For numerical stability n.reshape = L.Reshape(n.power, num_axes=0, axis=-1, shape=dict(dim=[1])) n.tile = L.Tile(n.reshape, axis=1, tiles=dim) n.elwise = L.Eltwise(n.data, n.tile, operation=P.Eltwise.PROD) return n.to_proto()
def early_combine_mult_tall(self, vec1, vec2): feature = self.tall_feature(vec1, vec2) setattr(self.n, 'feature', feature) intermediate = L.InnerProduct( feature, num_output=self.visual_embedding_dim[-1], weight_filler=self.uniform_weight_filler(-0.08, .08), param=self.learning_params([[1, 1], [2, 0]], ['eltwise_dist1', 'eltwise_dist1_b']), axis=1) nonlin_1 = L.ReLU(intermediate) setattr(self.n, 'intermediate', nonlin_1) dropout = L.Dropout(nonlin_1, dropout_ratio=self.dropout_visual) score = L.InnerProduct( dropout, num_output=1, weight_filler=self.uniform_weight_filler(-0.08, .08), param=self.learning_params([[1, 1], [2, 0]], ['eltwise_dist2', 'eltwise_dist2_b']), axis=1) negative_score = L.Power(score, scale=-1) setattr(self.n, 'rank_score', score) return score
def gru_unit(self, prefix, x, cont, static=None, h=None, batch_size=100, timestep=0, gru_hidden=1000, weight_lr_mult=1, bias_lr_mult=2, weight_decay_mult=1, bias_decay_mult=0, concat_hidden=True, weight_filler=None, bias_filler=None): #assume static input already transformed if not weight_filler: weight_filler = self.uniform_weight_filler(-0.08, 0.08) if not bias_filler: bias_filler = self.constant_filler(0) if not h: h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) def get_name(name): return '%s_%s' % (prefix, name) def get_param(weight_name, bias_name=None): #TODO: write this in terms of earlier method "init_params" w = dict(lr_mult=weight_lr_mult, decay_mult=weight_decay_mult, name=get_name(weight_name)) if bias_name is not None: b = dict(lr_mult=bias_lr_mult, decay_mult=bias_decay_mult, name=get_name(bias_name)) return [w, b] return [w] gate_dim = gru_hidden * 3 #transform x_t x = L.InnerProduct(x, num_output=gate_dim, axis=2, weight_filler=weight_filler, bias_filler=bias_filler, param=get_param('W_xc', 'b_c')) self.rename_tops(x, get_name('%d_x_transform' % timestep)) #transform h h_conted = L.Scale(h, cont, axis=0) h = L.InnerProduct(h_conted, num_output=gru_hidden * 2, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hc')) h_name = get_name('%d_h_transform' % timestep) if not hasattr(self.n, h_name): setattr(self.n, h_name, h) #gru stuff TODO: write GRUUnit in caffe? would make all this much prettier. x_transform_z_r, x_transform_hc = L.Slice(x, slice_point=gru_hidden * 2, axis=2, ntop=2) sum_items = [x_transform_z_r, h] if static: sum_items += static z_r_sum = self.sum(sum_items) z_r = L.Sigmoid(z_r_sum) z, r = L.Slice(z_r, slice_point=gru_hidden, axis=2, ntop=2) z_weighted_h = self.prod([r, h_conted]) z_h_transform = L.InnerProduct(z_weighted_h, num_output=gru_hidden, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hzc')) sum_items = [x_transform_hc, z_h_transform] if static: sum_items += static hc_sum = self.sum(sum_items) hc = L.TanH(hc) zm1 = L.Power(z, scale=-1, shift=1) h_h = self.prod([zm1, h_conted]) h_hc = self.prod([z, hc]) h = self.sum([h_h, h_hc]) return h
def subtract(self, bottoms): assert len(bottoms) == 2 negate = L.Power(bottoms[1], scale=-1) return L.Eltwise(bottoms[0], bottoms[1], operation=1)
def convert_symbol2proto(symbol): def looks_like_weight(name): """Internal helper to figure out if node should be hidden with `hide_weights`. """ if name.endswith("_weight"): return True if name.endswith("_bias"): return True if name.endswith("_beta") or name.endswith("_gamma") or name.endswith( "_moving_var") or name.endswith("_moving_mean"): return True return False json_symbol = json.loads(symbol.tojson()) all_nodes = json_symbol['nodes'] no_weight_nodes = [] for node in all_nodes: op = node['op'] name = node['name'] if op == 'null': if looks_like_weight(name): continue no_weight_nodes.append(node) # build next node dict next_node = dict() for node in no_weight_nodes: node_name = node['name'] for input in node['inputs']: last_node_name = all_nodes[input[0]]['name'] if last_node_name in next_node: next_node[last_node_name].append(node_name) else: next_node[last_node_name] = [node_name] supported_op_type = [ 'null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling', 'elemwise_add', 'SliceChannel', 'FullyConnected', 'SoftmaxOutput', '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution', 'UpSampling' ] top_dict = dict() caffe_net = caffe.NetSpec() for node in no_weight_nodes: if node['op'] == 'null': input_param = dict() if node['name'] == 'data': input_param['shape'] = dict(dim=[1, 3, 160, 160]) else: input_param['shape'] = dict(dim=[1]) top_data = CL.Input(ntop=1, input_param=input_param) top_dict[node['name']] = [top_data] setattr(caffe_net, node['name'], top_data) elif node['op'].endswith('_copy'): pass elif node['op'] == 'BatchNorm': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if 'momentum' in attr: momentum = float(attr['momentum']) else: momentum = 0.9 if 'eps' in attr: eps = float(attr['eps']) else: eps = 0.001 if NO_INPLACE: in_place = False bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]], ntop=1, batch_norm_param=dict( use_global_stats=True, moving_average_fraction=momentum, eps=eps), in_place=in_place) setattr(caffe_net, node['name'], bn_top) scale_top = CL.Scale(bn_top, ntop=1, scale_param=dict(bias_term=True), in_place=not NO_INPLACE) top_dict[node['name']] = [scale_top] setattr(caffe_net, node['name'] + '_scale', scale_top) elif node['op'] == 'Convolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Deconvolution': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] convolution_param['kernel_size'] = kernel_size[0] else: convolution_param['kernel_size'] = 1 if 'no_bias' in attr: convolution_param['bias_term'] = not eval(attr['no_bias']) else: convolution_param['bias_term'] = False if 'num_group' in attr: convolution_param['group'] = int(attr['num_group']) convolution_param['num_output'] = int(attr['num_filter']) if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] convolution_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] convolution_param['stride'] = stride_size[0] conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'UpSampling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] convolution_param = dict() if 'scale' in attr: kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2 convolution_param['kernel_size'] = kernel_size else: convolution_param['kernel_size'] = 1 convolution_param['bias_term'] = False convolution_param['num_output'] = int(attr['num_filter']) convolution_param['group'] = int(attr['num_filter']) convolution_param['pad'] = int( math.ceil((eval(attr['scale']) - 1) / 2.)) convolution_param['stride'] = eval(attr['scale']) conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]], ntop=1, convolution_param=convolution_param) top_dict[node['name']] = [conv_top] setattr(caffe_net, node['name'], conv_top) elif node['op'] == 'Activation': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False if attr['act_type'] == 'relu': ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'sigmoid': ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) elif attr['act_type'] == 'tanh': ac_top = CL.TanH(top_dict[bottom_node_name][input[1]], ntop=1, in_place=in_place) top_dict[node['name']] = [ac_top] setattr(caffe_net, node['name'], ac_top) elif node['op'] == 'Pooling': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] pooling_param = dict() if attr['pool_type'] == 'avg': pooling_param['pool'] = 1 elif attr['pool_type'] == 'max': pooling_param['pool'] = 0 else: assert False, attr['pool_type'] if 'global_pool' in attr and eval(attr['global_pool']) is True: pooling_param['global_pooling'] = True else: if 'kernel' in attr: kernel_size = eval(attr['kernel']) assert kernel_size[0] == kernel_size[1] pooling_param['kernel_size'] = kernel_size[0] if 'pad' in attr: pad_size = eval(attr['pad']) assert pad_size[0] == pad_size[1] pooling_param['pad'] = pad_size[0] if 'stride' in attr: stride_size = eval(attr['stride']) assert stride_size[0] == stride_size[1] pooling_param['stride'] = stride_size[0] pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]], ntop=1, pooling_param=pooling_param) top_dict[node['name']] = [pool_top] setattr(caffe_net, node['name'], pool_top) elif node['op'] == 'elemwise_add' or node['op'] == 'add_n': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 1 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_maximum': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] eltwise_param = dict() eltwise_param['operation'] = 2 ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1, eltwise_param=eltwise_param) top_dict[node['name']] = [ele_add_top] setattr(caffe_net, node['name'], ele_add_top) elif node['op'] == '_mul_scalar': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] in_place = False if len(next_node[bottom_node_name]) == 1: in_place = True if NO_INPLACE: in_place = False ac_top = CL.Power(top_dict[bottom_node_name][input[1]], power=1.0, scale=float(attr['scalar']), shift=0, in_place=in_place) top_dict[node['name']] = [ac_top] setattr(caffe_net, node['name'], ac_top) elif node['op'] == 'SliceChannel': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] slice_param = dict() slice_param['slice_dim'] = 1 slice_num = 2 slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]], ntop=slice_num, slice_param=slice_param) top_dict[node['name']] = slice_outputs for idx, output in enumerate(slice_outputs): setattr(caffe_net, node['name'] + '_' + str(idx), output) elif node['op'] == 'FullyConnected': input = node['inputs'][0] while True: if all_nodes[input[0]]['op'] not in supported_op_type: input = all_nodes[input[0]]['inputs'][0] else: break bottom_node_name = all_nodes[input[0]]['name'] attr = node['attrs'] inner_product_param = dict() inner_product_param['num_output'] = int(attr['num_hidden']) fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]], ntop=1, inner_product_param=inner_product_param) top_dict[node['name']] = [fc_top] setattr(caffe_net, node['name'], fc_top) elif node['op'] == 'SoftmaxOutput': input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] softmax_loss = CL.SoftmaxWithLoss( top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [softmax_loss] setattr(caffe_net, node['name'], softmax_loss) elif node['op'] == 'Concat': if len(node['inputs']) == 2: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] concat_top = CL.Concat( top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) elif len(node['inputs']) == 3: input_a = node['inputs'][0] while True: if all_nodes[input_a[0]]['op'] not in supported_op_type: input_a = all_nodes[input_a[0]]['inputs'][0] else: break input_b = node['inputs'][1] while True: if all_nodes[input_b[0]]['op'] not in supported_op_type: input_b = all_nodes[input_b[0]]['inputs'][0] else: break input_c = node['inputs'][2] while True: if all_nodes[input_c[0]]['op'] not in supported_op_type: input_c = all_nodes[input_c[0]]['inputs'][0] else: break bottom_node_name_a = all_nodes[input_a[0]]['name'] bottom_node_name_b = all_nodes[input_b[0]]['name'] bottom_node_name_c = all_nodes[input_c[0]]['name'] concat_top = CL.Concat( top_dict[bottom_node_name_a][input_a[1]], top_dict[bottom_node_name_b][input_b[1]], top_dict[bottom_node_name_c][input_c[1]], ntop=1) top_dict[node['name']] = [concat_top] setattr(caffe_net, node['name'], concat_top) else: logging.warn('unknown op type = %s' % node['op']) return caffe_net.to_proto()
def pva_convHeader(net, from_layer, out_layer, use_pool=True, lr=1, decay=1): bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'batch_norm_param': dict(use_global_stats=True), } scale_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=lr, decay_mult=0), dict(lr_mult=lr, decay_mult=0)], } power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0} conv_kwargs = { 'param': [dict(lr_mult=lr, decay_mult=decay)], 'weight_filler': dict(type='xavier'), 'bias_term': False, } layer_name = "{}/conv".format(out_layer) name = "{}/conv".format(out_layer) net[name] = L.Convolution(net[from_layer], name=layer_name, num_output=16, \ kernel_size=7, pad=3, stride=2, **conv_kwargs) start_layer = name layer_name = "{}/bn".format(out_layer) name = "{}/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) feaLayers = [] feaLayers.append(net[name]) start_layer = name neg_layer = "{}/neg".format(out_layer) neg_name = "{}/neg".format(out_layer) net[neg_name] = L.Power(net[start_layer], name=neg_layer, **power_kwargs) feaLayers.append(net[neg_name]) concat_layer = "{}/concat".format(out_layer) concat_name = out_layer net[concat_name] = L.Concat(*feaLayers, name=concat_layer, axis=1) start_layer = concat_name layer_name = "{}/scale".format(out_layer) name = "{}/scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/relu".format(out_layer) name = "{}/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name # pool if use_pool: layer_name = "pool1" name = "pool1" net[name] = L.Pooling(net[start_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2) return net
def ResInceptionLayer(net, from_layer, out_layer, cross_stage=False, channels_1=64, \ channels_3=[48,128], channels_5=[24,48,128],channels_pool=128, \ channels_output=256, lr=1, decay=1, out_bn=False): assert len(channels_3) == 2 assert len(channels_5) == 3 bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'batch_norm_param': dict(use_global_stats=True), } scale_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=lr, decay_mult=0), dict(lr_mult=lr, decay_mult=0)], } input_kwargs = {'power': 1, 'scale': 1, 'shift': 0} conv_kwargs = { 'param': [dict(lr_mult=lr, decay_mult=decay)], 'weight_filler': dict(type='xavier'), 'bias_term': False, } convbias_kwargs = { 'param': [ dict(lr_mult=lr, decay_mult=decay), dict(lr_mult=2 * lr, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } eltwise_kwargs = {'operation': 1, 'coeff': [1, 1]} start_layer = from_layer if cross_stage: stride = 2 else: stride = 1 # pre-stage: bn/scale/relu layer_name = "{}/incep/bn".format(out_layer) name = "{}/incep/pre".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=False, **bn_kwargs) start_layer = name layer_name = "{}/incep/bn_scale".format(out_layer) name = "{}/incep/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/relu".format(out_layer) name = "{}/incep/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) fea_layer = name mlayers = [] # conv-1x1 layer_name = "{}/incep/0/conv".format(out_layer) name = "{}/incep/0".format(out_layer) net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_1, \ kernel_size=1, pad=0, stride=stride, **conv_kwargs) start_layer = name layer_name = "{}/incep/0/bn".format(out_layer) name = "{}/incep/0/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/0/bn_scale".format(out_layer) name = "{}/incep/0/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/0/relu".format(out_layer) name = "{}/incep/0/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) mlayers.append(net[name]) # conv-3x3 layer_name = "{}/incep/1_reduce/conv".format(out_layer) name = "{}/incep/1_reduce".format(out_layer) net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_3[0], \ kernel_size=1, pad=0, stride=stride, **conv_kwargs) start_layer = name layer_name = "{}/incep/1_reduce/bn".format(out_layer) name = "{}/incep/1_reduce/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/1_reduce/bn_scale".format(out_layer) name = "{}/incep/1_reduce/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/1_reduce/relu".format(out_layer) name = "{}/incep/1_reduce/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/incep/1_0/conv".format(out_layer) name = "{}/incep/1_0".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_3[1], \ kernel_size=3, pad=1, stride=1, **conv_kwargs) start_layer = name layer_name = "{}/incep/1_0/bn".format(out_layer) name = "{}/incep/1_0/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/1_0/bn_scale".format(out_layer) name = "{}/incep/1_0/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/1_0/relu".format(out_layer) name = "{}/incep/1_0/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) mlayers.append(net[name]) # conv-5x5 layer_name = "{}/incep/2_reduce/conv".format(out_layer) name = "{}/incep/2_reduce".format(out_layer) net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_5[0], \ kernel_size=1, pad=0, stride=stride, **conv_kwargs) start_layer = name layer_name = "{}/incep/2_reduce/bn".format(out_layer) name = "{}/incep/2_reduce/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/2_reduce/bn_scale".format(out_layer) name = "{}/incep/2_reduce/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/2_reduce/relu".format(out_layer) name = "{}/incep/2_reduce/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/incep/2_0/conv".format(out_layer) name = "{}/incep/2_0".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_5[1], \ kernel_size=3, pad=1, stride=1, **conv_kwargs) start_layer = name layer_name = "{}/incep/2_0/bn".format(out_layer) name = "{}/incep/2_0/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/2_0/bn_scale".format(out_layer) name = "{}/incep/2_0/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/2_0/relu".format(out_layer) name = "{}/incep/2_0/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/incep/2_1/conv".format(out_layer) name = "{}/incep/2_1".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_5[2], \ kernel_size=3, pad=1, stride=1, **conv_kwargs) start_layer = name layer_name = "{}/incep/2_1/bn".format(out_layer) name = "{}/incep/2_1/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/2_1/bn_scale".format(out_layer) name = "{}/incep/2_1/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/2_1/relu".format(out_layer) name = "{}/incep/2_1/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) mlayers.append(net[name]) # pool if cross_stage: layer_name = "{}/incep/pool".format(out_layer) name = "{}/incep/pool".format(out_layer) net[name] = L.Pooling(net[fea_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2) start_layer = name layer_name = "{}/incep/poolproj/conv".format(out_layer) name = "{}/incep/poolproj".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_pool, \ kernel_size=1, pad=0, stride=1, **conv_kwargs) start_layer = name layer_name = "{}/incep/poolproj/bn".format(out_layer) name = "{}/incep/poolproj/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/incep/poolproj/bn_scale".format(out_layer) name = "{}/incep/poolproj/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/incep/poolproj/relu".format(out_layer) name = "{}/incep/poolproj/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) mlayers.append(net[name]) # incep layer_name = "{}/incep".format(out_layer) name = "{}/incep".format(out_layer) net[name] = L.Concat(*mlayers, name=layer_name, axis=1) start_layer = name # out-conv scLayers = [] if not out_bn: layer_name = "{}/out/conv".format(out_layer) name = "{}/out".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_output, \ kernel_size=1, pad=0, stride=1, **convbias_kwargs) scLayers.append(net[name]) else: layer_name = "{}/out/conv".format(out_layer) name = "{}/out".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_output, \ kernel_size=1, pad=0, stride=1, **conv_kwargs) start_layer = name layer_name = "{}/out/bn".format(out_layer) name = "{}/out/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=True, **bn_kwargs) start_layer = name layer_name = "{}/out/bn_scale".format(out_layer) name = "{}/out/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) scLayers.append(net[name]) # proj or input if cross_stage: layer_name = "{}/proj".format(out_layer) name = "{}/proj".format(out_layer) net[name] = L.Convolution(net[from_layer], name=layer_name, num_output=channels_output, \ kernel_size=1, pad=0, stride=2, **convbias_kwargs) scLayers.append(net[name]) else: layer_name = "{}/input".format(out_layer) name = "{}/input".format(out_layer) net[name] = L.Power(net[from_layer], name=layer_name, **input_kwargs) scLayers.append(net[name]) # Eltwise layer_name = out_layer name = out_layer net[name] = L.Eltwise(*scLayers, name=layer_name, **eltwise_kwargs) return net
def mCReLULayer(net, from_layer, out_layer, reduced_channels=24, \ inter_channels=24, output_channels=48, lr=1, decay=1, \ use_prior_bn=True, cross_stage=False, has_pool=False): bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'batch_norm_param': dict(use_global_stats=True), } scale_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=lr, decay_mult=0), dict(lr_mult=lr, decay_mult=0)], } power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0} input_kwargs = {'power': 1, 'scale': 1, 'shift': 0} conv_kwargs = { 'param': [ dict(lr_mult=lr, decay_mult=decay), dict(lr_mult=2 * lr, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } eltwise_kwargs = {'operation': 1, 'coeff': [1, 1]} # conv/1: bn/scale/relu/conv start_layer = from_layer if use_prior_bn: layer_name = "{}/1/bn".format(out_layer) name = "{}/1/pre".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=False, **bn_kwargs) start_layer = name layer_name = "{}/1/bn_scale".format(out_layer) name = "{}/1/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/1/relu".format(out_layer) name = "{}/1/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/1/conv".format(out_layer) name = "{}/1".format(out_layer) if has_pool: stride = 2 else: stride = 1 net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=reduced_channels, \ kernel_size=1, pad=0, stride=stride, **conv_kwargs) start_layer = name # conv/2: bn/scale/relu/conv layer_name = "{}/2/bn".format(out_layer) name = "{}/2/pre".format(out_layer) net[name] = L.BatchNorm(net[start_layer], name=layer_name, in_place=False, **bn_kwargs) start_layer = name layer_name = "{}/2/bn_scale".format(out_layer) name = "{}/2/bn_scale".format(out_layer) net[name] = L.Scale(net[start_layer], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/2/relu".format(out_layer) name = "{}/2/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/2/conv".format(out_layer) name = "{}/2".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=inter_channels, \ kernel_size=3, pad=1, stride=1, **conv_kwargs) start_layer = name # conv/3: bn/neg/concat/scale/relu/conv feaLayers = [] bn_layer = "{}/3/bn".format(out_layer) bn_name = "{}/3/pre".format(out_layer) net[bn_name] = L.BatchNorm(net[start_layer], name=bn_layer, in_place=False, **bn_kwargs) feaLayers.append(net[bn_name]) start_layer = bn_name neg_layer = "{}/3/neg".format(out_layer) neg_name = "{}/3/neg".format(out_layer) net[neg_name] = L.Power(net[start_layer], name=neg_layer, **power_kwargs) feaLayers.append(net[neg_name]) concat_layer = "{}/3/concat".format(out_layer) concat_name = "{}/3/preAct".format(out_layer) net[concat_name] = L.Concat(*feaLayers, name=concat_layer, axis=1) layer_name = "{}/3/scale".format(out_layer) name = "{}/3/scale".format(out_layer) net[name] = L.Scale(net[concat_name], name=layer_name, in_place=True, **scale_kwargs) start_layer = name layer_name = "{}/3/relu".format(out_layer) name = "{}/3/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True) start_layer = name layer_name = "{}/3/conv".format(out_layer) name = "{}/3".format(out_layer) net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=output_channels, \ kernel_size=1, pad=0, stride=1, **conv_kwargs) start_layer = name mlayers = [] mlayers.append(net[name]) # proj or input if cross_stage: layer_name = "{}/proj".format(out_layer) name = "{}/proj".format(out_layer) if has_pool: start_layer = "{}/1/pre".format(out_layer) stride = 2 else: start_layer = from_layer stride = 1 net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=output_channels, \ kernel_size=1, pad=0, stride=stride, **conv_kwargs) mlayers.append(net[name]) else: layer_name = "{}/input".format(out_layer) name = "{}/input".format(out_layer) start_layer = from_layer net[name] = L.Power(net[start_layer], name=layer_name, **input_kwargs) mlayers.append(net[name]) # eltwise layer_name = out_layer name = out_layer net[name] = L.Eltwise(*mlayers, name=layer_name, **eltwise_kwargs) return net
def smCReLULayer_NBN(net, from_layer, out_layer, channels=32, use_reduced_layer=False, reduced_layers=[], \ lr=1, decay=1): bn_kwargs = { 'param': [ dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0) ], 'batch_norm_param': dict(use_global_stats=True), } scale_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=lr, decay_mult=0), dict(lr_mult=lr, decay_mult=0)], } power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0} conv_kwargs = { 'param': [ dict(lr_mult=lr, decay_mult=decay), dict(lr_mult=2 * lr, decay_mult=0) ], 'weight_filler': dict(type='xavier'), 'bias_filler': dict(type='constant', value=0) } conv_nb_kwargs = { 'param': [dict(lr_mult=lr, decay_mult=decay)], 'weight_filler': dict(type='xavier'), 'bias_term': False, } start_layer = from_layer # 1x1 convLayer if use_reduced_layer: name = "{}/reduced/conv".format(out_layer) net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[0], \ kernel_size=1, pad=0, stride=1, **conv_kwargs) start_layer = name name = "{}/reduced/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], in_place=True) start_layer = name # 3x3 convLayer if use_reduced_layer: name = "{}/inter/conv".format(out_layer) net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[1], \ kernel_size=3, pad=1, stride=1, **conv_nb_kwargs) start_layer = name name = "{}/inter/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], in_place=False, **bn_kwargs) start_layer = name neg_name = "{}/inter/neg".format(out_layer) net[neg_name] = L.Power(net[start_layer], **power_kwargs) name = "{}/inter/concat".format(out_layer) net[name] = L.Concat(net[start_layer], net[neg_name], axis=1) start_layer = name name = "{}/inter/scale".format(out_layer) net[name] = L.Scale(net[start_layer], in_place=True, **scale_kwargs) start_layer = name name = "{}/inter/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], in_place=True) start_layer = name else: name = "{}/conv".format(out_layer) net[name] = L.Convolution(net[start_layer], num_output=channels, \ kernel_size=3, pad=1, stride=1, **conv_nb_kwargs) start_layer = name name = "{}/bn".format(out_layer) net[name] = L.BatchNorm(net[start_layer], in_place=False, **bn_kwargs) start_layer = name neg_name = "{}/neg".format(out_layer) net[neg_name] = L.Power(net[start_layer], **power_kwargs) name = "{}/concat".format(out_layer) net[name] = L.Concat(net[start_layer], net[neg_name], axis=1) start_layer = name name = "{}/scale".format(out_layer) net[name] = L.Scale(net[start_layer], in_place=True, **scale_kwargs) start_layer = name name = "{}/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], in_place=True) start_layer = name # 1x1 if use_reduced_layer: name = "{}/out/conv".format(out_layer) net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[2], \ kernel_size=1, pad=0, stride=1, **conv_kwargs) start_layer = name name = "{}/out/relu".format(out_layer) net[name] = L.ReLU(net[start_layer], in_place=True) start_layer = name return net
def ReIDExtLayers(net, from_layer="convf", label_layer="label", net_input_width=432, net_input_height=324, train=True, lr=1, decay=1): # roi_data_layer -> [ROI_POOLING + LABEL] # roi_pooling_layer -> (10,10) (0.0625(1/16)) # we use [conv4_3(reorg) + conv5_5] as convf # use stride_conv to get conv6_1 # -> conv6_2 -> conv6_3 -> (stride_conv) conv7_1 -> conv7_2 -> avg_pool # -> FC (256) -> Normalize # -> LabeledMatch / UnlabeledMatch (use label) # use scale / concat to get {L+Q} array # -> softmaxWithLoss & accuracy (train) assert from_layer in net.keys() # Roi_Data_Layer roi_data_kwargs = { 'net_input_width': net_input_width, 'net_input_height': net_input_height } net.roi_pool, net.roi_label = L.RoiData(net[label_layer], ntop=2, roi_data_param=roi_data_kwargs) # Roi_Pooling_Layer roi_pool_kwargs = { 'pooled_h': 10, 'pooled_w': 10, 'spatial_scale': 0.0625, } net.rpf = L.ROIPooling(net[from_layer], net.roi_pool, roi_pooling_param=roi_pool_kwargs) # ConvLayers # conv6 ConvBNUnitLayer(net, "rpf", "reid_c61", use_bn=False, use_relu=True, \ num_output=256, kernel_size=3, pad=1,stride=2) ConvBNUnitLayer(net, "reid_c61", "reid_c62", use_bn=False, use_relu=True, \ num_output=256, kernel_size=3, pad=1,stride=1) # ConvBNUnitLayer(net, "reid_c62", "reid_c63", use_bn=False, use_relu=True, \ # num_output=256, kernel_size=3, pad=1,stride=1) # conv7 ConvBNUnitLayer(net, "reid_c62", "reid_c71", use_bn=False, use_relu=True, \ num_output=256, kernel_size=3, pad=1,stride=2) ConvBNUnitLayer(net, "reid_c71", "reid_c72", use_bn=False, use_relu=True, \ num_output=256, kernel_size=3, pad=1,stride=1) # avg_pool net.avgpool = L.Pooling(net["reid_c72"], pool=P.Pooling.AVE, global_pooling=True) # FC & Norm fc_kwargs = { 'param': [ dict(lr_mult=lr, decay_mult=decay), dict(lr_mult=2 * lr, decay_mult=0) ], 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant', value=0) } net.fp = L.InnerProduct(net.avgpool, num_output=256, **fc_kwargs) net.fpn = L.Normalize(net.fp) # Match labelMatch_kwargs = { 'num_classes': 5532, 'momentum': 0.5, } net.labeled_match, net.gt = L.LabeledMatch( net.fpn, net.roi_label, ntop=2, labeled_match_param=labelMatch_kwargs) unlabelMatch_kwargs = { 'queue_size': 5000, } net.unlabeled_match = L.UnlabeledMatch( net.fpn, net.roi_label, unlabeled_match_param=unlabelMatch_kwargs) # scale power_kwargs = {'scale': 10} net.labeled_match_scale = L.Power(net.labeled_match, **power_kwargs) net.unlabeled_match_scale = L.Power(net.unlabeled_match, **power_kwargs) # concat: cosine similarity net.cosine = L.Concat(net.labeled_match_scale, net.unlabeled_match_scale, axis=1) if train: # softmaxWithLoss loss_kwargs = { 'ignore_label': -1, 'normalize': True, } net.loss = L.SoftmaxWithLoss(net.cosine, net.gt, propagate_down=[True, False], loss_weight=[1], loss_param=loss_kwargs) else: # accuracy accu_kwargs = { 'ignore_label': -1, 'top_k': 1, } net.accuracy = L.AccuracyReid(net.cosine, net.gt, accuracy_param=accu_kwargs) return net
def create_ssn_net(img_height, img_width, num_spixels, pos_scale, color_scale, num_spixels_h, num_spixels_w, num_steps, phase=None): n = caffe.NetSpec() if phase == 'TRAIN': n.img, n.spixel_init, n.feat_spixel_init, n.label, n.problabel = \ L.Python(python_param = dict(module = "input_patch_data_layer", layer = "InputRead", param_str = "TRAIN_1000000_" + str(num_spixels)), include = dict(phase = 0), ntop = 5) elif phase == 'TEST': n.img, n.spixel_init, n.feat_spixel_init, n.label, n.problabel = \ L.Python(python_param = dict(module = "input_patch_data_layer", layer = "InputRead", param_str = "VAL_10_" + str(num_spixels)), include = dict(phase = 1), ntop = 5) else: n.img = L.Input(shape=[dict(dim=[1, 3, img_height, img_width])]) n.spixel_init = L.Input( shape=[dict(dim=[1, 1, img_height, img_width])]) n.feat_spixel_init = L.Input( shape=[dict(dim=[1, 1, img_height, img_width])]) n.pixel_features = L.PixelFeature(n.img, pixel_feature_param=dict( type=P.PixelFeature.POSITION_AND_RGB, pos_scale=float(pos_scale), color_scale=float(color_scale))) ### Transform Pixel features n.trans_features = cnn_module(n.pixel_features, trans_dim) # Initial Superpixels n.init_spixel_feat = L.SpixelFeature(n.trans_features, n.feat_spixel_init, spixel_feature_param =\ dict(type = P.SpixelFeature.AVGRGB, rgb_scale = 1.0, ignore_idx_value = -10, ignore_feature_value = 255, max_spixels = int(num_spixels))) ### Iteration-1 n.spixel_feat1 = exec_iter(n.init_spixel_feat, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-2 n.spixel_feat2 = exec_iter(n.spixel_feat1, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-3 n.spixel_feat3 = exec_iter(n.spixel_feat2, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-4 n.spixel_feat4 = exec_iter(n.spixel_feat3, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) if num_steps == 5: ### Iteration-5 n.final_pixel_assoc = \ compute_assignments(n.spixel_feat4, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) elif num_steps == 10: ### Iteration-5 n.spixel_feat5 = exec_iter(n.spixel_feat4, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-6 n.spixel_feat6 = exec_iter(n.spixel_feat5, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-7 n.spixel_feat7 = exec_iter(n.spixel_feat6, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-8 n.spixel_feat8 = exec_iter(n.spixel_feat7, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-9 n.spixel_feat9 = exec_iter(n.spixel_feat8, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) ### Iteration-10 n.final_pixel_assoc = \ compute_assignments(n.spixel_feat9, n.trans_features, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, trans_dim) if phase == 'TRAIN' or phase == 'TEST': # Compute final spixel features n.new_spixel_feat = L.SpixelFeature2(n.pixel_features, n.final_pixel_assoc, n.spixel_init, spixel_feature2_param =\ dict(num_spixels_h = num_spixels_h, num_spixels_w = num_spixels_w)) n.new_spix_indices = compute_final_spixel_labels( n.final_pixel_assoc, n.spixel_init, num_spixels_h, num_spixels_w) n.recon_feat2 = L.Smear(n.new_spixel_feat, n.new_spix_indices, propagate_down=[True, False]) n.loss1, n.loss2 = position_color_loss(n.recon_feat2, n.pixel_features, pos_weight=0.00001, col_weight=0.0) # Convert pixel labels to spixel labels n.spixel_label = L.SpixelFeature2(n.problabel, n.final_pixel_assoc, n.spixel_init, spixel_feature2_param =\ dict(num_spixels_h = num_spixels_h, num_spixels_w = num_spixels_w)) # Convert spixel labels back to pixel labels n.recon_label = decode_features(n.final_pixel_assoc, n.spixel_label, n.spixel_init, num_spixels_h, num_spixels_w, num_spixels, num_channels=50) n.recon_label = L.ReLU(n.recon_label, in_place=True) n.recon_label2 = L.Power(n.recon_label, power_param=dict(shift=1e-10)) n.recon_label3 = normalize(n.recon_label2, 50) n.loss3 = L.LossWithoutSoftmax(n.recon_label3, n.label, loss_param=dict(ignore_label=255), loss_weight=1.0) else: n.new_spix_indices = compute_final_spixel_labels( n.final_pixel_assoc, n.spixel_init, num_spixels_h, num_spixels_w) return n.to_proto()
def generate_model(split, config): n = caffe.NetSpec() dataset = config.dataset batch_size = config.N mode_str = str(dict(dataset=dataset, split=split, batch_size=batch_size)) n.image1, n.image2, n.label, n.sample_weights, n.feat_crop = L.Python( module=config.data_provider, layer=config.data_provider_layer, param_str=mode_str, ntop=5) ################################ # the base net (VGG-16) branch 1 n.conv1_1, n.relu1_1 = conv_relu(n.image1, 64, param_names=('conv1_1_w', 'conv1_1_b'), fix_param=True, finetune=False) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64, param_names=('conv1_2_w', 'conv1_2_b'), fix_param=True, finetune=False) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128, param_names=('conv2_1_w', 'conv2_1_b'), fix_param=True, finetune=False) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128, param_names=('conv2_2_w', 'conv2_2_b'), fix_param=True, finetune=False) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256, param_names=('conv3_1_w', 'conv3_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256, param_names=('conv3_2_w', 'conv3_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256, param_names=('conv3_3_w', 'conv3_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.pool3 = max_pool(n.relu3_3) # spatial L2 norm n.pool3_lrn = L.LRN(n.pool3, local_size=513, alpha=513, beta=0.5, k=1e-16) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512, param_names=('conv4_1_w', 'conv4_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512, param_names=('conv4_2_w', 'conv4_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512, param_names=('conv4_3_w', 'conv4_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) # spatial L2 norm n.relu4_3_lrn = L.LRN(n.relu4_3, local_size=1025, alpha=1025, beta=0.5, k=1e-16) #n.pool4 = max_pool(n.relu4_3) #n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, # param_names=('conv5_1_w', 'conv5_1_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, # param_names=('conv5_2_w', 'conv5_2_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, # param_names=('conv5_3_w', 'conv5_3_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) # upsampling feature map #n.relu5_3_upsampling = L.Deconvolution(n.relu5_3, # convolution_param=dict(num_output=512, # group=512, # kernel_size=4, # stride=2, # pad=1, # bias_term=False, # weight_filler=dict(type='bilinear')), # param=[dict(lr_mult=0, decay_mult=0)]) # spatial L2 norm #n.relu5_3_lrn = L.LRN(n.relu5_3_upsampling, local_size=1025, alpha=1025, beta=0.5, k=1e-16) # concat all skip features #n.feat_all1 = n.relu4_3_lrn n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, concat_param=dict(axis=1)) #n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, n.relu5_3_lrn, concat_param=dict(axis=1)) n.feat_all1_crop = L.Crop(n.feat_all1, n.feat_crop, crop_param=dict(axis=2, offset=[ config.query_featmap_H // 3, config.query_featmap_W // 3 ])) ################################ # the base net (VGG-16) branch 2 n.conv1_1_p, n.relu1_1_p = conv_relu(n.image2, 64, param_names=('conv1_1_w', 'conv1_1_b'), fix_param=True, finetune=False) n.conv1_2_p, n.relu1_2_p = conv_relu(n.relu1_1_p, 64, param_names=('conv1_2_w', 'conv1_2_b'), fix_param=True, finetune=False) n.pool1_p = max_pool(n.relu1_2_p) n.conv2_1_p, n.relu2_1_p = conv_relu(n.pool1_p, 128, param_names=('conv2_1_w', 'conv2_1_b'), fix_param=True, finetune=False) n.conv2_2_p, n.relu2_2_p = conv_relu(n.relu2_1_p, 128, param_names=('conv2_2_w', 'conv2_2_b'), fix_param=True, finetune=False) n.pool2_p = max_pool(n.relu2_2_p) n.conv3_1_p, n.relu3_1_p = conv_relu(n.pool2_p, 256, param_names=('conv3_1_w', 'conv3_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_2_p, n.relu3_2_p = conv_relu(n.relu3_1_p, 256, param_names=('conv3_2_w', 'conv3_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_3_p, n.relu3_3_p = conv_relu(n.relu3_2_p, 256, param_names=('conv3_3_w', 'conv3_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.pool3_p = max_pool(n.relu3_3_p) # spatial L2 norm n.pool3_lrn_p = L.LRN(n.pool3_p, local_size=513, alpha=513, beta=0.5, k=1e-16) n.conv4_1_p, n.relu4_1_p = conv_relu(n.pool3_p, 512, param_names=('conv4_1_w', 'conv4_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_2_p, n.relu4_2_p = conv_relu(n.relu4_1_p, 512, param_names=('conv4_2_w', 'conv4_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_3_p, n.relu4_3_p = conv_relu(n.relu4_2_p, 512, param_names=('conv4_3_w', 'conv4_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) # spatial L2 norm n.relu4_3_lrn_p = L.LRN(n.relu4_3_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16) #n.pool4_p = max_pool(n.relu4_3_p) #n.conv5_1_p, n.relu5_1_p = conv_relu(n.pool4_p, 512, # param_names=('conv5_1_w', 'conv5_1_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_2_p, n.relu5_2_p = conv_relu(n.relu5_1_p, 512, # param_names=('conv5_2_w', 'conv5_2_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_3_p, n.relu5_3_p = conv_relu(n.relu5_2_p, 512, # param_names=('conv5_3_w', 'conv5_3_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) # upsampling feature map #n.relu5_3_upsampling_p = L.Deconvolution(n.relu5_3_p, # convolution_param=dict(num_output=512, # group=512, # kernel_size=4, # stride=2, # pad=1, # bias_term=False, # weight_filler=dict(type='bilinear')), # param=[dict(lr_mult=0, decay_mult=0)]) # spatial L2 norm #n.relu5_3_lrn_p = L.LRN(n.relu5_3_upsampling_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16) # concat all skip features #n.feat_all2 = n.relu4_3_lrn_p n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, concat_param=dict(axis=1)) #n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, n.relu5_3_lrn_p, concat_param=dict(axis=1)) # Dyn conv layer n.fcn_scores = L.DynamicConvolution(n.feat_all2, n.feat_all1_crop, convolution_param=dict( num_output=1, kernel_size=11, stride=1, pad=5, bias_term=False)) # scale scores with zero mean 0.01196 -> 0.02677 n.fcn_scaled_scores = L.Power(n.fcn_scores, power_param=dict(scale=0.01196, shift=-1.0, power=1)) # Loss Layer n.loss = L.WeightedSigmoidCrossEntropyLoss(n.fcn_scaled_scores, n.label, n.sample_weights) return n.to_proto()
def lrcn_reinforce(self, save_name, RL_loss='lstm_classification', lw=20): data_inputs = self.data_inputs param_str = self.param_str ss_tag = 'reg_' #reg sentences will be the first part of the batch if self.separate_sents: if not 'batch_size' in param_str.keys(): param_str['batch_size'] = 100 self.slice_point = param_str['batch_size'] / 2 self.batch_size = param_str['batch_size'] param_str_loss = {} param_str_loss['vocab'] = param_str['vocabulary'] param_str_loss['avoid_words'] = ['red', 'small'] if self.baseline: param_str_loss['baseline'] = True data_input = 'fc8' data_tops = self.python_input_layer(data_inputs['module'], data_inputs['layer'], param_str) self.rename_tops(data_tops, data_inputs['param_str']['top_names']) feature_name = 'fc8' self.n.tops[feature_name] = L.InnerProduct( self.n.tops[param_str['image_data_key']], num_output=1000, weight_filler=self.uniform_weight_filler(-.08, .08), bias_filler=self.constant_filler(0), param=self.init_params([[1, 1], [2, 0]])) if self.cc: #If class conditional data_top = self.n.tops['fc8'] class_top = self.n.tops[param_str['data_label_feat']] self.n.tops['class_input'] = L.Concat(data_top, class_top, axis=1) data_input = 'class_input' else: self.silence(self.n.tops[param_str['data_label_feat']]) bottom_sent = self.n.tops[param_str['text_data_key']] bottom_cont = self.n.tops[param_str['text_marker_key']] #prep for caption model bottom_cont_slice = L.Slice(bottom_cont, ntop=self.T, axis=0) self.rename_tops(bottom_cont_slice, ['bottom_cont_%d' % i for i in range(self.T)]) if not self.separate_sents: bottom_sent_slice = L.Slice(bottom_sent, ntop=self.T, axis=0) self.rename_tops(bottom_sent_slice, ['input_sent_%d' % i for i in range(self.T)]) target_sentence = self.n.tops['target_sentence'] else: bottom_sents = L.Slice(bottom_sent, slice_point=[self.slice_point], axis=1, ntop=2) self.rename_tops(bottom_sents, ['reg_input_sent', 'rl_input_sent']) reg_bottom_sents_slice = L.Slice(self.n.tops['reg_input_sent'], axis=0, ntop=20) rl_bottom_sents_slice = L.Slice(self.n.tops['rl_input_sent'], axis=0, ntop=20) self.silence([rl_bottom_sents_slice[i] for i in range(1, self.T)]) self.n.tops['input_sent_0'] = L.Concat(reg_bottom_sents_slice[0], rl_bottom_sents_slice[0], axis=1) self.rename_tops( reg_bottom_sents_slice, ['reg_input_sent_%d' % i for i in range(1, self.T)]) self.rename_tops(reg_bottom_sents_slice, ['reg_input_sent_%d' % i for i in range(self.T)]) slice_target_sentence = L.Slice(self.n.tops['target_sentence'], slice_point=[self.slice_point], axis=1, ntop=2) self.rename_tops(slice_target_sentence, ['reg_target_sentence', 'rl_target_sentence']) self.silence(self.n.tops['rl_target_sentence']) target_sentence = self.n.tops['reg_target_sentence'] self.n.tops['lstm1_h0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm1_c0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm2_h0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm2_c0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.make_caption_model(static_input=data_input) #prep bottoms for loss predict_tops = [self.n.tops['predict_%d' % i] for i in range(self.T)] self.n.tops['predict_concat'] = L.Concat(*predict_tops, axis=0) if self.separate_sents: word_sample_tops = [ self.n.tops['rl_word_sample_reshape_%d' % i] for i in range(1, self.T + 1) ] self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops, axis=0) concat_predict_tops = L.Slice(self.n.tops['predict_concat'], slice_point=[self.slice_point], axis=1, ntop=2) reg_predict = concat_predict_tops[0] RL_predict = concat_predict_tops[1] bottom_cont_tops = L.Slice(bottom_cont, slice_point=[self.slice_point], axis=1, ntop=2) self.silence(bottom_cont_tops[0]) label_tops = L.Slice(self.n.tops[param_str['data_label']], slice_point=[self.slice_point], axis=0, ntop=2) self.silence(label_tops[0]) self.rename_tops([bottom_cont_tops[1], label_tops[1]], ['rl_bottom_cont', 'rl_label_top']) label_top = self.n.tops['rl_label_top'] bottom_cont = self.n.tops['rl_bottom_cont'] else: word_sample_tops = [ self.n.tops['word_sample_reshape_%d' % i] for i in range(1, self.T + 1) ] self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops, axis=0) reg_predict = self.n.tops['predict_concat'] RL_predict = self.n.tops['predict_concat'] label_top = self.n.tops[param_str['data_label']] #RL loss if RL_loss == 'lstm_classification': self.n.tops['embed_classification'] = self.embed( self.n.tops['word_sample_concat'], 1000, input_dim=self.vocab_size, bias_term=False, learning_param=self.init_params([[0, 0]])) self.n.tops['lstm_classification'] = self.lstm( self.n.tops['embed_classification'], bottom_cont, learning_param_lstm=self.init_params([[0, 0], [0, 0], [0, 0]]), lstm_hidden=1000) self.n.tops['predict_classification'] = L.InnerProduct( self.n.tops['lstm_classification'], num_output=200, axis=2) self.n.tops['probs_classification'] = L.Softmax( self.n.tops['predict_classification'], axis=2) #classification reward layer: classification, word_sample_concat (to get sentence length), #data label should be single stream; even though trained with 20 stream... self.n.tops['reward'] = self.python_layer([ self.n.tops['probs_classification'], self.n.tops['word_sample_concat'], label_top ], 'loss_layers', 'sequenceClassificationLoss', param_str_loss) self.n.tops['reward_reshape'] = L.Reshape(self.n.tops['reward'], shape=dict(dim=[1, -1])) self.n.tops['reward_tile'] = L.Tile(self.n.tops['reward_reshape'], axis=0, tiles=self.T) #softmax with sampled words as "correct" word self.n.tops['sample_loss'] = self.softmax_per_inst_loss( RL_predict, self.n.tops['word_sample_concat'], axis=2) self.n.tops['sample_reward'] = L.Eltwise(self.n.tops['sample_loss'], self.n.tops['reward_tile'], propagate_down=[1, 0], operation=0) avoid_lw = 100 self.n.tops['normalized_reward'] = L.Power( self.n.tops['sample_reward'], scale=(1. / self.N) * avoid_lw) self.n.tops['sum_rewards'] = L.Reduction( self.n.tops['normalized_reward'], loss_weight=[1]) self.n.tops['sentence_loss'] = self.softmax_loss(reg_predict, target_sentence, axis=2, loss_weight=20) self.write_net(save_name)
def test_power(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.pow1 = L.Power(n.input1, power=2.0, scale=0.5, shift=0.01) self._test_model(*self._netspec_to_model(n, 'power'))