def _graph_dict(): # This function creates a graph that has no real meaning other than # providing something to traverse. d = {} d['i1'] = C.sequence.input_variable(shape=(2, 3), sequence_axis=Axis('ia'), name='i1') d['c1'] = C.constant(shape=(2, 3), value=6, name='c1') d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1') d['op1'] = C.plus(d['i1'], d['c1'], name='op1') d['op2'] = C.times(d['op1'], d['p1'], name='op2') #d['slice'] = slice(d['c1'], Axis.default_dynamic_axis(), 0, 3) #label_sentence_start = sequence.first(raw_labels) # no name d['p2'] = C.parameter(shape=(2, 2)) # duplicate names d['op3a'] = C.plus(d['op2'], d['p2'], name='op3') d['op3b'] = C.plus(d['op3a'], d['p2'], name='op3') d['first'] = C.sequence.first(d['op3b'], name='past') d['root'] = d['first'] return d
def multiFunc(self, arg1): # load or create the inputs we need multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) # lets compute the means we need # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits, # it is the difference between the previous bits approximation and the true value. carry_over = multiIn approx = C.element_times(multiIn, 0) # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization for i in range(max_bits): # determine which values of the input should be binarized to i bits or more hot_vals = C.greater(bit_map, i) # select only the values which we need to binarize valid_vals = C.element_select(hot_vals, carry_over, 0) # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels) mean = C.element_divide(C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1)) # reshape the mean to match the dimensionality of the input mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1)) # binarize the carry over bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) # add in the equivalent binary representation to the approximation approx = C.plus(approx, C.element_times(mean, bits)) # compute the new carry over carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def squash(input): # ||Sj||^2 Sj_squared_norm = ct.reduce_sum(ct.square(input), axis=axis) # ||Sj||^2 / (1 + ||Sj||^2) * (Sj / ||Sj||) factor = ct.element_divide( ct.element_divide(Sj_squared_norm, ct.plus(1, Sj_squared_norm)), ct.sqrt(ct.plus(Sj_squared_norm, epsilon))) return factor * input
def test_Add(tmpdir): shape = (4, 5) data1 = np.random.rand(*shape).astype(np.float32) data2 = np.random.rand(*shape).astype(np.float32) model = C.plus(data1, data2) verify_no_input(model, tmpdir, 'Add_0') x = C.input_variable(shape) model = C.plus(x, data2) verify_one_input(model, data1, tmpdir, 'Add_1') y = C.input_variable(shape) model = C.plus(x, y) verify_two_input(model, data1, data2, tmpdir, 'Add_2')
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def manipulation(self, input, digitcaps=None): if digitcaps is None: digitcaps = self.digitcaps self.perturbed_digitcaps = ct.plus(digitcaps, ct.reshape(input, shape=(1, 16, 1))) self.manipulation_mask = Masking(is_onehot_encoded=False)(self.perturbed_digitcaps, self.length) self.manipulation_model = self.decoder(self.manipulation_mask) return self.manipulation_model
def create_fast_rcnn_predictor(conv_out, rois, fc_layers): # RCNN roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0) fc_out = fc_layers(roi_out) # prediction head W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W") b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b") cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score') # regression head W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W") b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b") bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr') return cls_score, bbox_pred
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') # temporarily using cntk1 SparseInput because cntk2's Input() will simply allow sparse as a parameter features = cntk1.SparseInput(vocab, dynamicAxis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.plus(C.times(w, L), b, name='z') z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def test_load_save_inputs(tmpdir): i1 = C.input_variable((1,2), name='i1') i2 = C.input_variable((2,1), name='i2') root_node = C.plus(i1, i2) input1 = [[[1,2]]] input2 = [[[[1],[2]]]] result = root_node.eval({i1: input1, i2: input2}) expected = [[[[2,3],[3,4]]]] assert np.allclose(result, expected) filename = str(tmpdir / 'i_plus_i_0.mod') root_node.save(filename) loaded_node = C.Function.load(filename) # Test specifying the input nodes by name loaded_result = loaded_node.eval({'i1': input1, 'i2': input2}) assert np.allclose(loaded_result, expected) filename = filename + '.legacy' save_as_legacy_model(root_node, filename) loaded_node = C.Function.load(filename) loaded_result = loaded_node.eval({'i1': input1, 'i2': input2}) assert np.allclose(loaded_result, expected)
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None): print("creating eval model") last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze) rpn_out = rpn(conv_out) # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg): # The losses are normalized by the batch size # classification loss p_cls_score = placeholder() p_label_targets = placeholder() cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1) cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_score, cls_score), (p_label_targets, label_targets)], 'CrossEntropyWithSoftmax', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg): # RCNN roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (cfg["MODEL"].ROI_DIM, cfg["MODEL"].ROI_DIM), spatial_scale=1/16.0) fc_out = fc_layers(roi_out) # prediction head W_pred = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES), init=normal(scale=0.01), name="cls_score.W") b_pred = parameter(shape=cfg["DATA"].NUM_CLASSES, init=0, name="cls_score.b") cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score') # regression head W_regr = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES*4), init=normal(scale=0.001), name="bbox_regr.W") b_regr = parameter(shape=cfg["DATA"].NUM_CLASSES*4, init=0, name="bbox_regr.b") bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr') return cls_score, bbox_pred
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights): # classification loss cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1) p_cls_loss = placeholder() p_rois = placeholder() # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois roi_indicator = reduce_sum(p_rois, axis=1) cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0)) cls_normalization_factor = 1.0 / cls_num_terms normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_loss, cls_loss), (p_rois, rois)], 'Normalize', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) # The bbox loss is normalized by the batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def test_eval_plus_one_constant_last(): result = cntk.eval( cntk.plus([1., 2., 3., 4.], cntk.constant([1., 1., 0., 0.]))) TOLERANCE_ABSOLUTE = 1E-06 assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
def test_trainer(tmpdir, no_eval_function): in1 = C.input_variable(shape=(1,)) labels = C.input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1) trainer = C.Trainer(z, (ce, errs), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]} trainer.save_checkpoint(p, external_state) restored_state = trainer.restore_from_checkpoint(p) assert external_state == restored_state assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], C.Learner)
def test_eval_plus_one_input(): result = cntk.eval( cntk.plus(cntk.input_numpy([[1., 2., 3., 4.]]), [1., 1., 0., 0.])) TOLERANCE_ABSOLUTE = 1E-06 assert np.allclose(result, np.asarray([2., 3., 3., 4.]), atol=TOLERANCE_ABSOLUTE)
def import_operation(self, cntk_op): """ Recursively import and translate CNTK operations. Arguments: cntk_op: CNTK operation to be imported. Returns: Translated operation. """ if self.debug: for _ in range(len(inspect.stack())): print(' ', end="") print("Importing: " + cntk_op.uid + "(", end="") for i in cntk_op.inputs: print(i.uid + str(i.shape) + ",", end="") print(")") inputs = [] for i in cntk_op.inputs: axes = [ng.make_axis(dim) for dim in i.shape] dtype = np.dtype(i.dtype) if i.is_output: uid = i.owner.root_function.uid temp = self.uid_op_map[uid] if isinstance(temp, C.Function): temp = self.import_operation(temp) if temp is None: raise ValueError("Error translating: " + uid) else: if self.debug: for _ in range(len(inspect.stack()) + 1): print(' ', end="") print("Finished importing: " + uid + str(cntk_op.shape) + " -> " + temp.name + str(temp.shape.full_lengths)) self.uid_op_map[uid] = temp inputs.append(temp) elif i.is_input: if self.batch_size > 1: axes.append(ng.make_axis(self.batch_size, 'N')) temp = ng.placeholder(axes, dtype).named(i.uid) inputs.append(temp) self.placeholders.append(temp) else: try: input_value = i.value except AttributeError: input_value = C.plus(i, np.zeros(i.shape)).eval() if i.is_constant: inputs.append( ng.constant(input_value, axes, dtype).named(i.uid)) elif i.is_parameter: inputs.append( ng.variable(axes, dtype, input_value).named(i.uid)) else: raise ValueError("Unknown input: " + i.uid) return self.ops_bridge(cntk_op, inputs)
def test_Add(tmpdir): pytest.skip('Need to support new ONNX spec.') shape = (4, 5) data1 = np.random.rand(*shape).astype(np.float32) data2 = np.random.rand(*shape).astype(np.float32) model = C.plus(data1, data2) verify_no_input(model, tmpdir, 'Add_0') x = C.input_variable(shape) model = C.plus(x, data2) verify_one_input(model, data1, tmpdir, 'Add_1') y = C.input_variable(shape) model = C.plus(x, y) verify_two_input(model, data1, data2, tmpdir, 'Add_2')
def test_plus_3(): cntk_op = C.plus([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.array_equal(cntk_ret, ng_ret)
def test_Add(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data1 = np.random.rand(*shape).astype(dtype) data2 = np.random.rand(*shape).astype(dtype) model = C.plus(data1, data2) verify_no_input(model, tmpdir, 'Add_0') x = C.input_variable(shape) model = C.plus(x, data2) verify_one_input(model, data1, tmpdir, 'Add_1') y = C.input_variable(shape) model = C.plus(x, y) verify_two_input(model, data1, data2, tmpdir, 'Add_2')
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 64, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3, 3), 128, channels=64, pad=True) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3, 3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1, 1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes, )) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def multiFunc(self, arg1): multiIn = C.input(shape=arg1.shape, dynamic_axes=arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() carry_over = multiIn approx = C.element_times(multiIn, 0) for i in range(max_bits): hot_vals = C.greater(bit_map, i) valid_vals = C.element_select(hot_vals, carry_over, 0) mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals)) bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) approx = C.plus(approx, C.element_times(mean, bits)) carry_over = C.plus( C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def test_clone_with_unfound_previous_node(): x = C.input_variable(()) y = C.combine(x * x, x + x) y0 = y[0] y1 = y[1] y0_new = C.plus(y0,0, name="test") X=C.logging.find_by_name(y0_new, 'QueryReply_y') with pytest.raises(AttributeError): y_clone = y.clone(C.CloneMethod.share, {X:y0_new})
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def test_exception_for_unnamed_arguments(): i1 = C.input_variable((1,2), name='i1') i2 = C.input_variable((2,1), name='i2') root_node = C.plus(i1, i2) input1 = [[[1,2]]] input2 = [[[[1],[2]]]] with pytest.raises(Exception): # not allowed, since plus has more than 1 input result = root_node.eval([input1, input2])
def multiFunc(self, arg1): multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) carry_over = multiIn approx = C.element_times(multiIn, 0) for i in range(max_bits): hot_vals = C.greater(bit_map, i) valid_vals = C.element_select(hot_vals, carry_over, 0) mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals)) bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) approx = C.plus(approx, C.element_times(mean, bits)) carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def test_clone_with_wrong_type_node(): x = C.input_variable(()) y = C.combine(x * x, x + x) y0 = y[0] y1 = y[1] y0_new = C.plus(y0,0, name="test") X=C.logging.find_by_name(y0_new, 'QueryReply_y') a = 5 with pytest.raises(TypeError): y_clone = y.clone(C.CloneMethod.share, {y0:a})
def test_replace_placeholders(): p = C.placeholder(shape=(1,)) i = C.input_variable(shape=(1,), needs_gradient=True, name='i') res = p + 3 res.replace_placeholders({p: i}) assert res.eval({i: [[3]]}) == [6] func = C.plus(i, 10) res2 = p + 3 res2.replace_placeholders({p: func.output}) assert res2.eval({i: [[3]]}) == [16] func = C.plus(i, 11) res3 = p + 3 res3.replace_placeholders({p: func}) assert res3.eval({i: [[3]]}) == [17]
def create_sample_model(device, writer=None, lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])): in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def _simple_dict(): d = {} d['i1'] = C.input_variable(shape=(2, 3), name='i1') d['c1'] = C.constant(shape=(2, 3), value=6, name='c1') d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1') d['op1'] = C.plus(d['i1'], d['c1'], name='op1') d['op2'] = C.times(d['op1'], d['p1'], name='op2') d['root'] = d['op2'] d['target'] = C.input_variable((), name='label') d['all'] = C.combine([d['root'], C.minus( d['target'], C.constant(1, name='c2'), name='minus')], name='all') return d
def test_as_composite(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,)) b = C.parameter((proj_dim)) w = C.parameter((input_dim, proj_dim)) func_name = 't_plus_b' t_plus_b = C.plus(C.times(x, w), b, name=func_name) assert(t_plus_b.root_function.name == func_name) composite = C.as_composite(t_plus_b.root_function) assert(composite.root_function.name == func_name) composite = C.as_composite(composite) assert(composite.root_function.name == func_name) composite = C.as_composite(t_plus_b) assert(composite.root_function.name == func_name)
def test_output_to_retain(): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[[1]], [[2]]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def test_set_name(): x = C.input_variable((1,)) y = C.input_variable((1,)) x_plus_y = x + y assert (x_plus_y.name == '') x_plus_y.name = 'x_plus_y' assert (x_plus_y.name == 'x_plus_y') x_plus_y_2 = C.plus(x, y, name='x_plus_y_2') assert (x_plus_y_2.name == 'x_plus_y_2') with pytest.raises(ValueError): x_plus_y_2.name = 'x_plus_y_2_new' from ... import cntk_py cntk_py.allow_renaming_functions() x_plus_y_2.name = 'x_plus_y_2_new'
def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): """ From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 |x| - 0.5 / sigma^2, otherwise """ sigma2 = sigma * sigma inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets))) smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2) smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2) smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2) smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign), C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign))) return C.element_times(bbox_outside_weights, smooth_l1_result)
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [ dist_learner ]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def test_input_order(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,), name='x') b = C.parameter((proj_dim), name='b') w = C.parameter((input_dim, proj_dim), name='w') func_name = 't_plus_b' t = C.times(x, w) t_plus_b = C.plus(t, b, name=func_name) def compare_var_names(vars, names): num_vars = len(vars) for i in range(num_vars): if (vars[i].name != names[i]): return False return True assert compare_var_names(t.root_function.inputs, ['x', 'w']) assert compare_var_names(t.inputs, ['x', 'w']) assert compare_var_names(t_plus_b.inputs, ['x', 'w', 'b'])
def test_combine_duplicated_inputs(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,), name='x') b = C.parameter((proj_dim), name='b') w = C.parameter((input_dim, proj_dim), name='w') func_name = 't_plus_b' t = C.times(x, w) t_plus_b = C.plus(t, b, name=func_name) duplicated_t_plus_b = C.combine([t_plus_b, t_plus_b]) def compare_var_names(vars, names): num_vars = len(vars) for i in range(num_vars): if (vars[i].name != names[i]): return False return True assert compare_var_names(duplicated_t_plus_b.outputs, [func_name, func_name])
def plus(left, right, name=''): ''' The output of this operation is the sum of the two input tensors. It supports broadcasting. In case of scalars its backward pass propagates the received gradient. The operator (+) has been overloaded and can equally be used instead of plus() Example: >>> C.eval(C.plus([1, 2, 3], [4, 5, 6])) [array([[ 5., 7., 9.]])] >>> C.eval(C.plus([-5, -4, -3, -2, -1], [10])) [array([[ 5., 6., 7., 8., 9.]])] Args: left: left side tensor right: right side tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import plus left = sanitize_input(left, get_data_type(right)) right = sanitize_input(right, get_data_type(left)) return plus(left, right, name).output()
def create_eval_model(model, image_input, dims_input, rpn_model=None): print("creating eval model") conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze) rpn_rois = rpn(conv_out, dims_input) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model