def smooth_loss3(pred, canny, spixel_init, l_weight): spixel_x = gradient_x(spixel_init) spixel_y = gradient_y(spixel_init) pred_x = gradient_x(pred) pred_y = gradient_y(pred) weight_init_x = weight_edges2(spixel_x, 9, power=0.0001) weight_init_y = weight_edges2(spixel_y, 9, power=0.0001) w_pred_x = L.Eltwise(pred_x, weight_init_x, operation=P.Eltwise.PROD) w_pred_y = L.Eltwise(pred_y, weight_init_y, operation=P.Eltwise.PROD) canny_x = crop_x(canny) canny_y = crop_y(canny) weight_x = weight_edges2(canny_x, 9) weight_y = weight_edges2(canny_y, 9) smoothness_x = L.Eltwise(w_pred_x, weight_x, operation=P.Eltwise.PROD) smoothness_y = L.Eltwise(w_pred_y, weight_y, operation=P.Eltwise.PROD) mean_x_smooth = L.Reduction( smoothness_x, reduction_param=dict(operation=P.Reduction.SUM)) mean_y_smooth = L.Reduction( smoothness_y, reduction_param=dict(operation=P.Reduction.SUM)) smooth_loss = L.Eltwise(mean_x_smooth, mean_y_smooth, operation=P.Eltwise.SUM, loss_weight=l_weight) return smooth_loss
def bn_model_caffe(request, tmpdir): """Same as bn_model but with Caffe.""" import caffe from caffe import layers as L bounds = (0, 1) num_classes = channels = getattr(request, "param", 1000) net_spec = caffe.NetSpec() net_spec.data = L.Input(name="data", shape=dict(dim=[1, channels, 5, 5])) net_spec.reduce_1 = L.Reduction(net_spec.data, reduction_param={"operation": 4, "axis": 3}) net_spec.output = L.Reduction(net_spec.reduce_1, reduction_param={"operation": 4, "axis": 2}) net_spec.label = L.Input(name="label", shape=dict(dim=[1])) net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) wf = tmpdir.mkdir("test_models_caffe_fixture")\ .join("test_caffe_{}.prototxt".format(num_classes)) wf.write("force_backward: true\n" + str(net_spec.to_proto())) net = caffe.Net(str(wf), caffe.TEST) model = CaffeModel(net, bounds=bounds) return model
def smooth_loss(pred, img, l_weight): img_x = gradient_x(img) img_y = gradient_y(img) pred_x = gradient_x(pred) pred_y = gradient_y(pred) weight_x = weight_edges(img_x) weight_y = weight_edges(img_y) smoothness_x = L.Eltwise(pred_x, weight_x, operation=P.Eltwise.PROD) smoothness_y = L.Eltwise(pred_y, weight_y, operation=P.Eltwise.PROD) mean_x_smooth = L.Reduction( smoothness_x, reduction_param=dict(operation=P.Reduction.SUM)) mean_y_smooth = L.Reduction( smoothness_y, reduction_param=dict(operation=P.Reduction.SUM)) smooth_loss = L.Eltwise(mean_x_smooth, mean_y_smooth, operation=P.Eltwise.SUM, loss_weight=l_weight) return smooth_loss
def test_caffe_model_forward_gradient(tmpdir): import caffe from caffe import layers as L bounds = (0, 255) channels = num_classes = 1000 net_spec = caffe.NetSpec() net_spec.data = L.Input(name="data", shape=dict(dim=[1, num_classes, 5, 5])) net_spec.reduce_1 = L.Reduction(net_spec.data, reduction_param={ "operation": 4, "axis": 3 }) net_spec.output = L.Reduction(net_spec.reduce_1, reduction_param={ "operation": 4, "axis": 2 }) net_spec.label = L.Input(name="label", shape=dict(dim=[1])) net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) wf = tmpdir.mkdir("test_models_caffe").join( "test_caffe_model_gradient_proto_{}.prototxt".format(num_classes)) wf.write("force_backward: true\n" + str(net_spec.to_proto())) preprocessing = ( np.arange(num_classes)[:, None, None], np.random.uniform(size=(channels, 5, 5)) + 1, ) net = caffe.Net(str(wf), caffe.TEST) model = CaffeModel(net, bounds=bounds, preprocessing=preprocessing) epsilon = 1e-2 np.random.seed(23) test_images = np.random.rand(5, channels, 5, 5).astype(np.float32) test_labels = [7] * 5 _, g1 = model.forward_and_gradient_one(test_images, test_labels) l1 = model._loss_fn(test_images - epsilon / 2 * g1, test_labels) l2 = model._loss_fn(test_images + epsilon / 2 * g1, test_labels) assert np.all(1e4 * (l2 - l1) > 1) # make sure that gradient is numerically correct np.testing.assert_array_almost_equal( 1e4 * (l2 - l1), 1e4 * epsilon * np.linalg.norm(g1.reshape(len(g1), -1, g1.shape[-1]), axis=(1, 2))**2, decimal=1, )
def context_supervision_loss(self, distance, lw=1, ind_loss=None): """ Distance is positive; want gt distance to be SMALLER than other distances. Loss used for context supervision is also ranking loss: Look at rank loss between all possible pairs of moments; want gt distance to be smaller. Take average. """ slices = L.Slice(distance, ntop=21, axis=1) gt = slices[0] setattr(self.n, 'gt_slice', gt) ranking_losses = [] for i in range(1, 21): setattr(self.n, 'context_slice_%d' % i, slices[i]) negate_distance = L.Power(slices[i], scale=-1) max_sum = L.Eltwise(gt, negate_distance, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) if ind_loss: max_sum_margin_relu = L.Reshape( max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1])) max_sum_margin_relu = L.Eltwise(max_sum_margin_relu, ind_loss, operation=0) setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4) ranking_losses.append(ranking_loss) sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1) loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw]) return loss
def smooth_loss2(pred, l_weight): pred_x = gradient_x(pred) pred_y = gradient_y(pred) mean_x_smooth = L.Reduction( pred_x, reduction_param=dict(operation=P.Reduction.SUM)) mean_y_smooth = L.Reduction( pred_y, reduction_param=dict(operation=P.Reduction.SUM)) smooth_loss = L.Eltwise(mean_x_smooth, mean_y_smooth, operation=P.Eltwise.SUM, loss_weight=l_weight) return smooth_loss
def test_reduce4(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([10, 3, 64, 64])) n.pooling1 = L.Reduction(n.input1, operation=P.Reduction.MEAN, axis=3, coeff=1.3) self._test_model(*self._netspec_to_model(n, 'reduce3'))
def l2normed(self,vec, dim): #Returns L2-normalized instances of vec; i.e., for each instance x in vec, #computes x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim.""" denom = L.Reduction(vec, axis=1, operation=P.Reduction.SUMSQ) denom = L.Power(denom, power=(-0.5), shift=1e-12) denom = L.Reshape(denom, num_axes=0, axis=-1, shape=dict(dim=[1])) denom = L.Tile(denom, axis=1, tiles=dim) return L.Eltwise(vec, denom, operation=P.Eltwise.PROD)
def normalize(self, bottom, axis=1, numtiles=4096): power = L.Power(bottom, power=2) power_sum = L.Reduction(power, axis=axis, operation=1) sqrt = L.Power(power_sum, power=-0.5, shift=0.00001) if axis == 1: reshape = L.Reshape(sqrt, shape=dict(dim=[-1, 1])) if axis == 2: reshape = L.Reshape(sqrt, shape=dict(dim=[self.batch_size, -1, 1])) tile = L.Tile(reshape, axis=axis, tiles=numtiles) return L.Eltwise(tile, bottom, operation=0)
def l1_loss(bottom1, bottom2, l_weight): diff = L.Eltwise(bottom1, bottom2, eltwise_param=dict(operation=P.Eltwise.SUM, coeff=[1, -1])) absval = L.AbsVal(diff) loss = L.Reduction(absval, reduction_param=dict(operation=P.Reduction.SUM), loss_weight=l_weight) return loss
def tall_loss(self, positive, negative, query, lw=1): scores_p = self.distance_function(positive, query) scores_n = self.distance_function(negative, query) alpha_c = 1 alpha_w = 1 exp_p = L.Exp(scores_p, scale=-1) exp_n = L.Exp(scores_n) log_p = L.Log(exp_p, shift=1) log_n = L.Log(exp_n, shift=1) scale_p = L.Power(log_p, scale=alpha_c) scale_n = L.Power(log_n, scale=alpha_w) all_scores = L.Concat(scale_p, scale_n, axis=0) return L.Reduction(all_scores, operation=4, loss_weight=[lw])
def ranking_loss(self, p, n, t, lw=1): # I <3 Caffe - this is not obnoxious to write at all. distance_p = self.distance_function(p, t) distance_n = self.distance_function(n, t) negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def relational_ranking_loss(self, distance_p, distance_n, lw=1): """ This function assumes you want to MINIMIZE distances """ negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def smooth_loss4(pred, canny, l_weight): pred_x = gradient_x(pred) pred_y = gradient_y(pred) canny_x = crop_x(canny) canny_y = crop_y(canny) weight_x = weight_edges2(canny_x, 5) weight_y = weight_edges2(canny_y, 5) smoothness_x = L.Eltwise(pred_x, weight_x, operation=P.Eltwise.PROD) smoothness_y = L.Eltwise(pred_y, weight_y, operation=P.Eltwise.PROD) mean_x_smooth = L.Reduction( smoothness_x, reduction_param=dict(operation=P.Reduction.SUM)) mean_y_smooth = L.Reduction( smoothness_y, reduction_param=dict(operation=P.Reduction.SUM)) smooth_loss = L.Eltwise(mean_x_smooth, mean_y_smooth, operation=P.Eltwise.SUM, loss_weight=l_weight) return smooth_loss
def ranking_loss(self, p, n, t, lw=1): #For ranking used in paper distance_p = self.distance_function(p, t) distance_n = self.distance_function(n, t) negate_distance_n = L.Power(distance_n, scale=-1) max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1) max_sum_margin = L.Power(max_sum, shift=self.margin) max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False) ranking_loss = L.Reduction(max_sum_margin_relu, operation=4, loss_weight=[lw]) return ranking_loss
def l2normed(dim): n = caffe.NetSpec() n.data, n.label = L.Python(module='layers', layer='tripletDataLayer', ntop=2) """Returns L2-normalized instances of vec; i.e., for each instance x in vec, computes x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim.""" n.denom = L.Reduction(n.data, axis=1, operation=P.Reduction.SUMSQ) #denom = L.Power(denom, power=(-0.5)) n.power = L.Power(n.denom, power=(-0.5), shift=1e-12) # For numerical stability n.reshape = L.Reshape(n.power, num_axes=0, axis=-1, shape=dict(dim=[1])) n.tile = L.Tile(n.reshape, axis=1, tiles=dim) n.elwise = L.Eltwise(n.data, n.tile, operation=P.Eltwise.PROD) return n.to_proto()
def _code_regularization(self, lCW): ns = self.netspec # Semantic codes. Needs to be initialized. code_shape = [ sum(self.code_dim), len(self.train_classes) if self.semantics == ATTRIBUTES else sum(self.num_states) ] name = 'SCoRe/cwReg/codewords' sem_cw = ns[name] = L.DummyData(name=name, shape=dict(dim=code_shape), include=dict(phase=caffe.TRAIN)) # Classification codes. name = 'SCoRe/cwReg/eye' x = ns[name] = L.DummyData( name=name, shape=dict(dim=[code_shape[0], code_shape[0]]), include=dict(phase=caffe.TRAIN)) name = 'SCoRe/cwReg/cls_codewords' clf_cw = ns[name] = L.InnerProduct(x, name=name, num_output=code_shape[1], bias_term=False, param=[{ 'name': lCW }], include=dict(phase=caffe.TRAIN)) # Compute \sum |S-C|^2 name = 'SCoRe/cwReg/diff' x_diff = ns[name] = L.Eltwise(*[sem_cw, clf_cw], name=name, operation=P.Eltwise.SUM, coeff=[1., -1.], include=dict(phase=caffe.TRAIN)) name = 'SCoRe/cwReg' ns[name] = L.Reduction(x_diff, name=name, operation=P.Reduction.SUMSQ, axis=0, loss_weight=self.code_coeff, include=dict(phase=caffe.TRAIN))
def pool_distances(self, vec, minimum_distance=True): #want to MINIMIZE distance; negate, maximize, then negate (again) #Assume that scores are Nx21 size blob if args.pool_type in ['max', 'average']: prep_pool = L.Reshape(vec, shape=dict(dim=[self.batch_size, 1, 21, 1])) if minimum_distance: prep_pool = L.Power(prep_pool, scale=-1) max_pool = L.Pooling(prep_pool, pool=pooling_type[self.args.pool_type], kernel_h=21, kernel_w=1) pool = L.Reshape(max_pool, shape=dict(dim=[self.batch_size])) if minimum_distance: pool = L.Power(pool, scale=-1) elif args.pool_type in ['sum']: #untested negative = L.Power(vec, scale=-1) pool = L.Reduction(negative, axis=1, operation=1) #sum else: raise Exception("You did not select a valid pooling type.") return pool
def compile_time_operation(self, learning_option, cluster): """ define reduction operation for input blob """ # get input input_ = self.get_input('input') indim = self.get_dimension('input') # get attr # required field op = self.get_attr('operation', default=None) if op is None: raise Exception( '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format( 'op', self.name)) # optional field axis = self.get_attr('axis', default=None) scale = float(self.get_attr('scale', default=1.0)) # get output dimension if axis == len(indim): indim.pop() outdim = indim else: outdim = indim outdim[axis] = 1 reduction = L.Reduction(input_, name=self.name, operation=op, axis=axis, coeff=scale) # set output self.set_output('output', reduction) self.set_dimension('output', outdim)
def test_caffe_model_preprocessing_shape_change(tmpdir): import caffe from caffe import layers as L bounds = (0, 255) channels = num_classes = 1000 net_spec = caffe.NetSpec() net_spec.data = L.Input(name="data", shape=dict(dim=[1, num_classes, 5, 5])) net_spec.reduce_1 = L.Reduction(net_spec.data, reduction_param={ "operation": 4, "axis": 3 }) net_spec.output = L.Reduction(net_spec.reduce_1, reduction_param={ "operation": 4, "axis": 2 }) net_spec.label = L.Input(name="label", shape=dict(dim=[1])) net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) wf = tmpdir.mkdir("test_models_caffe")\ .join("test_caffe_model_preprocessing_shape_change_{}.prototxt" .format(num_classes)) wf.write("force_backward: true\n" + str(net_spec.to_proto())) net = caffe.Net(str(wf), caffe.TEST) model1 = CaffeModel(net, bounds=bounds) def preprocessing2(x): if x.ndim == 3: x = np.transpose(x, axes=(2, 0, 1)) elif x.ndim == 4: x = np.transpose(x, axes=(0, 3, 1, 2)) def grad(dmdp): assert dmdp.ndim == 3 dmdx = np.transpose(dmdp, axes=(1, 2, 0)) return dmdx return x, grad model2 = CaffeModel(net, bounds=bounds, preprocessing=preprocessing2) np.random.seed(22) test_images_nhwc = np.random.rand(2, 5, 5, channels).astype(np.float32) test_images_nchw = np.transpose(test_images_nhwc, (0, 3, 1, 2)) p1 = model1.forward(test_images_nchw) p2 = model2.forward(test_images_nhwc) assert np.all(p1 == p2) p1 = model1.forward_one(test_images_nchw[0]) p2 = model2.forward_one(test_images_nhwc[0]) assert np.all(p1 == p2) g1 = model1.gradient_one(test_images_nchw[0], 3) assert g1.ndim == 3 g1 = np.transpose(g1, (1, 2, 0)) g2 = model2.gradient_one(test_images_nhwc[0], 3) np.testing.assert_array_almost_equal(g1, g2)
def CaffeTrackerNet(net, from_layer="data", label_layer="label"): # CaffeNet kwargs = { 'param': [dict(lr_mult=0, decay_mult=1), dict(lr_mult=0, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0), } # conv1 net.conv1 = L.Convolution(net[from_layer], num_output=96, stride=4, kernel_size=11, **kwargs) net.relu1 = L.ReLU(net.conv1, in_place=True) # pool1 net.pool1 = L.Pooling(net.relu1, pool=P.Pooling.MAX, kernel_size=3, stride=2) # norm1 net.norm1 = L.LRN(net.pool1, lrn_param=dict(local_size=5, alpha=0.0001, beta=0.75)) # conv2 net.conv2 = L.Convolution(net.norm1, num_output=256, pad=2, group=2, kernel_size=5, **kwargs) net.relu2 = L.ReLU(net.conv2, in_place=True) # pool2 net.pool2 = L.Pooling(net.relu2, pool=P.Pooling.MAX, kernel_size=3, stride=2) # norm2 net.norm2 = L.LRN(net.pool2, lrn_param=dict(local_size=5, alpha=0.0001, beta=0.75)) # conv3 net.conv3 = L.Convolution(net.norm2, num_output=384, pad=1, kernel_size=3, **kwargs) net.relu3 = L.ReLU(net.conv3, in_place=True) # conv4 #net.conv4 = L.Convolution(net.relu3, num_output=384, pad=1, group=2, kernel_size=3, **kwargs) #net.relu4 = L.ReLU(net.conv4, in_place=True) # conv5 #net.conv5 = L.Convolution(net.relu4, num_output=256, pad=1, group=2, kernel_size=3, **kwargs) #net.relu5 = L.ReLU(net.conv5, in_place=True) # pool5 net.pool5 = L.Pooling(net.relu3, pool=P.Pooling.MAX, kernel_size=3, stride=2) # HalfMerge net.convf = L.Halfmerge(net.pool5) # FC layers fc_kwargs = { 'param': [dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.005), 'bias_filler': dict(type='constant', value=1), } net.fc6 = L.InnerProduct(net.convf, name="fc6-new1", num_output=4096, **fc_kwargs) net.relu6 = L.ReLU(net.fc6, in_place=True) net.drop6 = L.Dropout(net.relu6, in_place=True, dropout_param=dict(dropout_ratio=0.5)) net.fc7 = L.InnerProduct(net.drop6, name="fc7-new1", num_output=4096, **fc_kwargs) net.relu7 = L.ReLU(net.fc7, in_place=True) net.drop7 = L.Dropout(net.relu7, in_place=True, dropout_param=dict(dropout_ratio=0.5)) net.fc7b = L.InnerProduct(net.drop7, name="fc7-newb1", num_output=4096, **fc_kwargs) net.relu7b = L.ReLU(net.fc7b, in_place=True) net.drop7b = L.Dropout(net.relu7b, in_place=True, dropout_param=dict(dropout_ratio=0.5)) fc_kwargs = { 'param': [dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0), } net.fc8 = L.InnerProduct(net.drop7b, name="fc8-shapes1", num_output=4, **fc_kwargs) # GT layers net.neg = L.Power(net[label_layer], power_param=dict(power=1, scale=-1, shift=0)) net.neg_flat = L.Flatten(net.neg, name="flatten1") # add net.out_diff = L.Eltwise(net.fc8, net.neg_flat, name="subtract1") # loss net.loss = L.Reduction(net.out_diff, name="abssum1", loss_weight=1, reduction_param=dict(operation=2)) return net
def test_reduce(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([10, 3, 64, 64])) n.pooling1 = L.Reduction(n.input1, operation=P.Reduction.SUM, axis=0) self._test_model(*self._netspec_to_model(n, 'reduce'))
def lrcn_reinforce(self, save_name, RL_loss='lstm_classification', lw=20): data_inputs = self.data_inputs param_str = self.param_str ss_tag = 'reg_' #reg sentences will be the first part of the batch if self.separate_sents: if not 'batch_size' in param_str.keys(): param_str['batch_size'] = 100 self.slice_point = param_str['batch_size'] / 2 self.batch_size = param_str['batch_size'] param_str_loss = {} param_str_loss['vocab'] = param_str['vocabulary'] param_str_loss['avoid_words'] = ['red', 'small'] if self.baseline: param_str_loss['baseline'] = True data_input = 'fc8' data_tops = self.python_input_layer(data_inputs['module'], data_inputs['layer'], param_str) self.rename_tops(data_tops, data_inputs['param_str']['top_names']) feature_name = 'fc8' self.n.tops[feature_name] = L.InnerProduct( self.n.tops[param_str['image_data_key']], num_output=1000, weight_filler=self.uniform_weight_filler(-.08, .08), bias_filler=self.constant_filler(0), param=self.init_params([[1, 1], [2, 0]])) if self.cc: #If class conditional data_top = self.n.tops['fc8'] class_top = self.n.tops[param_str['data_label_feat']] self.n.tops['class_input'] = L.Concat(data_top, class_top, axis=1) data_input = 'class_input' else: self.silence(self.n.tops[param_str['data_label_feat']]) bottom_sent = self.n.tops[param_str['text_data_key']] bottom_cont = self.n.tops[param_str['text_marker_key']] #prep for caption model bottom_cont_slice = L.Slice(bottom_cont, ntop=self.T, axis=0) self.rename_tops(bottom_cont_slice, ['bottom_cont_%d' % i for i in range(self.T)]) if not self.separate_sents: bottom_sent_slice = L.Slice(bottom_sent, ntop=self.T, axis=0) self.rename_tops(bottom_sent_slice, ['input_sent_%d' % i for i in range(self.T)]) target_sentence = self.n.tops['target_sentence'] else: bottom_sents = L.Slice(bottom_sent, slice_point=[self.slice_point], axis=1, ntop=2) self.rename_tops(bottom_sents, ['reg_input_sent', 'rl_input_sent']) reg_bottom_sents_slice = L.Slice(self.n.tops['reg_input_sent'], axis=0, ntop=20) rl_bottom_sents_slice = L.Slice(self.n.tops['rl_input_sent'], axis=0, ntop=20) self.silence([rl_bottom_sents_slice[i] for i in range(1, self.T)]) self.n.tops['input_sent_0'] = L.Concat(reg_bottom_sents_slice[0], rl_bottom_sents_slice[0], axis=1) self.rename_tops( reg_bottom_sents_slice, ['reg_input_sent_%d' % i for i in range(1, self.T)]) self.rename_tops(reg_bottom_sents_slice, ['reg_input_sent_%d' % i for i in range(self.T)]) slice_target_sentence = L.Slice(self.n.tops['target_sentence'], slice_point=[self.slice_point], axis=1, ntop=2) self.rename_tops(slice_target_sentence, ['reg_target_sentence', 'rl_target_sentence']) self.silence(self.n.tops['rl_target_sentence']) target_sentence = self.n.tops['reg_target_sentence'] self.n.tops['lstm1_h0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm1_c0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm2_h0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.n.tops['lstm2_c0'] = self.dummy_data_layer( [1, self.N, self.lstm_dim], 0) self.make_caption_model(static_input=data_input) #prep bottoms for loss predict_tops = [self.n.tops['predict_%d' % i] for i in range(self.T)] self.n.tops['predict_concat'] = L.Concat(*predict_tops, axis=0) if self.separate_sents: word_sample_tops = [ self.n.tops['rl_word_sample_reshape_%d' % i] for i in range(1, self.T + 1) ] self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops, axis=0) concat_predict_tops = L.Slice(self.n.tops['predict_concat'], slice_point=[self.slice_point], axis=1, ntop=2) reg_predict = concat_predict_tops[0] RL_predict = concat_predict_tops[1] bottom_cont_tops = L.Slice(bottom_cont, slice_point=[self.slice_point], axis=1, ntop=2) self.silence(bottom_cont_tops[0]) label_tops = L.Slice(self.n.tops[param_str['data_label']], slice_point=[self.slice_point], axis=0, ntop=2) self.silence(label_tops[0]) self.rename_tops([bottom_cont_tops[1], label_tops[1]], ['rl_bottom_cont', 'rl_label_top']) label_top = self.n.tops['rl_label_top'] bottom_cont = self.n.tops['rl_bottom_cont'] else: word_sample_tops = [ self.n.tops['word_sample_reshape_%d' % i] for i in range(1, self.T + 1) ] self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops, axis=0) reg_predict = self.n.tops['predict_concat'] RL_predict = self.n.tops['predict_concat'] label_top = self.n.tops[param_str['data_label']] #RL loss if RL_loss == 'lstm_classification': self.n.tops['embed_classification'] = self.embed( self.n.tops['word_sample_concat'], 1000, input_dim=self.vocab_size, bias_term=False, learning_param=self.init_params([[0, 0]])) self.n.tops['lstm_classification'] = self.lstm( self.n.tops['embed_classification'], bottom_cont, learning_param_lstm=self.init_params([[0, 0], [0, 0], [0, 0]]), lstm_hidden=1000) self.n.tops['predict_classification'] = L.InnerProduct( self.n.tops['lstm_classification'], num_output=200, axis=2) self.n.tops['probs_classification'] = L.Softmax( self.n.tops['predict_classification'], axis=2) #classification reward layer: classification, word_sample_concat (to get sentence length), #data label should be single stream; even though trained with 20 stream... self.n.tops['reward'] = self.python_layer([ self.n.tops['probs_classification'], self.n.tops['word_sample_concat'], label_top ], 'loss_layers', 'sequenceClassificationLoss', param_str_loss) self.n.tops['reward_reshape'] = L.Reshape(self.n.tops['reward'], shape=dict(dim=[1, -1])) self.n.tops['reward_tile'] = L.Tile(self.n.tops['reward_reshape'], axis=0, tiles=self.T) #softmax with sampled words as "correct" word self.n.tops['sample_loss'] = self.softmax_per_inst_loss( RL_predict, self.n.tops['word_sample_concat'], axis=2) self.n.tops['sample_reward'] = L.Eltwise(self.n.tops['sample_loss'], self.n.tops['reward_tile'], propagate_down=[1, 0], operation=0) avoid_lw = 100 self.n.tops['normalized_reward'] = L.Power( self.n.tops['sample_reward'], scale=(1. / self.N) * avoid_lw) self.n.tops['sum_rewards'] = L.Reduction( self.n.tops['normalized_reward'], loss_weight=[1]) self.n.tops['sentence_loss'] = self.softmax_loss(reg_predict, target_sentence, axis=2, loss_weight=20) self.write_net(save_name)
def mynet(batch, steps, loss_type, dep=False, descr=False, part='gen'): conv_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=1)] bcnv_lr = [dict(lr_mult=1, decay_mult=1)] scale_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1)] bn_param = dict(eps=0.001, use_global_stats=False) fr_lr = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] fr_clr = [dict(lr_mult=0, decay_mult=0)] #fr_bn = dict(eps=0.001,use_global_stats=True) fr_bn = dict(eps=0.001, use_global_stats=False) if part == 'gen': gen_conv_lr = conv_lr gen_bcnv_lr = bcnv_lr gen_scale_lr = scale_lr gen_bn_param = bn_param dsc_conv_lr = fr_lr else: gen_conv_lr = fr_lr gen_bcnv_lr = fr_clr gen_scale_lr = fr_lr gen_bn_param = fr_bn dsc_conv_lr = conv_lr n = caffe.NetSpec() sp = dict(bias_term=True, filler=dict(value=1.0)) if dep: n.source = L.Input(input_param=dict(shape=[dict(dim=[1, 1, 64, 64])])) else: if descr: if part == 'gen': bs = batch else: bs = batch / 2 else: bs = batch n.data = L.Data( data_param=dict(source="db", batch_size=bs, backend=P.Data.LMDB)) n.expected, n.source = L.Slice(n.data, slice_param=dict(axis=1, slice_point=1), ntop=2) if descr: if part != 'gen': #n.data_ref = L.Split(n.expected) n.data_ref = L.Data(data_param=dict( source="db_ref", batch_size=batch / 2, backend=P.Data.LMDB)) n.label_0 = L.DummyData(shape=[dict(dim=[batch / 2])], data_filler=dict(value=0.0)) n.label_1 = L.DummyData(shape=[dict(dim=[batch / 2])], data_filler=dict(value=1.0)) n.label = L.Concat(n.label_0, n.label_1, concat_param=dict(axis=0)) else: n.label = L.DummyData(shape=[dict(dim=[batch])], data_filler=dict(value=1.0)) n.conv1 = L.Convolution(n.source, convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n.bn1 = L.BatchNorm(n.conv1, batch_norm_param=gen_bn_param) n.scale1 = L.Scale(n.bn1, scale_param=sp, param=gen_scale_lr) n.scale1 = L.ReLU(n.scale1) inp = "scale1" for m in range(steps): k = m + 1 cid1 = "step%d/conv1" % k cid2 = "step%d/conv2" % k bid1 = "step%d/bn1" % k bid2 = "step%d/bn2" % k eid = "step%d/elt" % k n[cid1] = L.Convolution(n[inp], convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n[bid1] = L.BatchNorm(n[cid1], batch_norm_param=gen_bn_param) n[bid1] = L.Scale(n[bid1], scale_param=sp, param=gen_scale_lr) n[bid1] = L.ReLU(n[bid1]) n[cid2] = L.Convolution(n[bid1], convolution_param=conv_param_nb(3, 16), param=gen_bcnv_lr) n[bid2] = L.BatchNorm(n[cid2], batch_norm_param=gen_bn_param) n[bid2] = L.Scale(n[bid2], scale_param=sp, param=gen_scale_lr) n[bid2] = L.ReLU(n[bid2]) n[eid] = L.Eltwise(n[bid2], n[inp]) inp = eid outname = "topconv" n[outname] = L.Convolution(n[inp], convolution_param=conv_param(3, 1), param=gen_conv_lr) n.generated = L.Sigmoid(n.topconv) if not dep: lw = 1 if part == 'gen' else 0 if loss_type == 'euc': n.l2_loss = L.EuclideanLoss(n.generated, n.expected, name="loss", loss_weight=lw) else: n.l2_loss = L.EuclideanLoss(n.generated, n.expected, name="loss", loss_weight=0) n.cross_entropy_loss = L.SigmoidCrossEntropyLoss(n.topconv, n.expected, name="loss", loss_weight=lw) if descr: if part != 'gen': n.desc_inp = L.Concat(n.generated, n.data_ref, concat_param=dict(axis=0)) cinp = "desc_inp" else: cinp = "generated" n.d_conv1 = L.Convolution(n[cinp], convolution_param=conv_param(5, 32), param=dsc_conv_lr) n.d_pool1 = L.Pooling(n.d_conv1, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool1 = L.ReLU(n.d_pool1) n.d_conv2 = L.Convolution(n.d_pool1, convolution_param=conv_param(5, 32), param=dsc_conv_lr) n.d_pool2 = L.Pooling(n.d_conv2, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool2 = L.ReLU(n.d_pool2) n.d_conv3 = L.Convolution(n.d_pool2, convolution_param=conv_param(5, 64), param=dsc_conv_lr) n.d_pool3 = L.Pooling(n.d_conv3, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool3 = L.ReLU(n.d_pool3) n.d_conv4 = L.Convolution(n.d_pool3, convolution_param=conv_param(3, 64), param=dsc_conv_lr) n.d_pool4 = L.Pooling(n.d_conv4, pooling_param=dict(kernel_size=3, stride=2, pool=P.Pooling.MAX)) n.d_pool4 = L.ReLU(n.d_pool4) n.d_ip1 = L.InnerProduct(n.d_pool4, param=dsc_conv_lr, inner_product_param=ip_param(512)) n.d_ip1 = L.ReLU(n.d_ip1) n.d_ip2 = L.InnerProduct(n.d_ip1, param=dsc_conv_lr, inner_product_param=ip_param(1)) n.sigmoid_loss = L.SigmoidCrossEntropyLoss(n.d_ip2, n.label, name="loss", loss_weight=100) n.score = L.Sigmoid(n.d_ip2) n.lbl_flat = L.Reshape(n.label, reshape_param=dict(shape=dict(dim=[-1, 1]))) n.diff = L.Eltwise( n.score, n.lbl_flat, eltwise_param=dict(coeff=[1.0 / batch, -1.0 / batch])) n.error = L.Reduction(n.diff, reduction_param=dict(operation=P.Reduction.ASUM)) #n.output = L.Split(n[cinp]) #n.output_labels = L.Split(n.score) #n.inputs = n.source return n
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Reduction(n.data, axis=0, coeff=1, operation=_operation) return n.to_proto()
def dot_product_distance(self, vec1, vec2, axis=1): mult = L.Eltwise(vec1, vec2, operation=0) reduction = L.Reduction(mult, axis=axis) negative = L.Power(reduction, scale=-1, shift=1) return negative
def euclidean_distance(self, vec1, vec2, axis=1): negative = L.Power(vec2, scale=-1) difference = L.Eltwise(vec1, negative, operation=1) squared = L.Power(difference, power=2) reduction = L.Reduction(squared, axis=axis) return reduction