def __call__(self, num_classes=10, act_type=relu, mdl_conv1a_nf=40, mdl_conv1b_nf=60, mdl_conv2a_nf=50, mdl_conv2b_nf=75, mdl_fc1_nh=75, mdl_drop2a_p=0.033, mdl_drop2b_p=0.097, mdl_drop3_p=0.412, **kwargs): input_var = input_variable((1, self.img_h, self.img_w), np.float32) label_var = input_variable((self.n_dim), np.float32) conv1a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1a')(input_var) conv1b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1b')(conv1a) pool1 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool1')(conv1b) conv2a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2a')(pool1) drop2a = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2a) conv2b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2b')(drop2a) drop2b = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2b) pool2 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool2')(drop2b) fc1 = Dense(shape=int(mdl_fc1_nh), init=glorot_uniform(), activation=act_type, name='fc1')(pool2) drop3 = Dropout(prob=mdl_drop3_p, name="drop3")(fc1) #fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=softmax, name='fc2')(drop3) fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=None, name='fc2')(drop3) return input_var, label_var, fc2
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input_variable(shape=(input_dim,)) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim,), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input_variable(shape=(proj_dim,)) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1],[2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim,)) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1, )) labels = input_variable(shape=(1, )) p = parameter(shape=(2, ), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [ momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True) ]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, [z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim, )) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim, ), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim, )) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({ x: [x_data], model1_label: [model1_label_data], model2_label: [model2_label_data] })
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name': network_name, 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': z }
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input_variable(shape=(input_dim, )) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim, ), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input_variable(shape=(proj_dim, )) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1], [2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def test_proposal_layer(): cls_prob_shape_cntk = (18,61,61) cls_prob_shape_caffe = (18,61,61) rpn_bbox_shape = (36, 61, 61) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, cntk.constant(im_info, (3,)))) state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred]}) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:,1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def create_bn_inception(): # Input variables denoting the features and label data feature_var = input_variable((NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH)) label_var = input_variable((NUM_CLASSES)) bn_time_const = 4096 z = bn_inception_cifar_model(feature_var, NUM_CLASSES, bn_time_const) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label' : label_var, 'ce' : ce, 'pe' : pe, 'pe5' : pe5, 'output' : z }
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, [z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def _setup_test_model(self): inputs = input_variable(shape=(1, ), dtype=np.float32) outputs = input_variable(shape=(1, ), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = squared_error(q, outputs) return {'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss}
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y') ], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { features: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def test_anchor_target_layer(): from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer rpn_cls_score_shape_cntk = (1, 18, 61, 61) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 wh = np.random.random_sample((num_gt_boxes, 2)) * 400 x2y2 = x1y1 + wh + 50 label = np.random.random_sample((num_gt_boxes, 1)) label = (label * 17.0) gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) # Create CNTK layer and call forward rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk) gt_boxes_var = input_variable(gt_boxes_shape_cntk) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True)) state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input}) obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0] bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0] bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0] cntk_objectness_target = cntk_output[obj_key][0] cntk_bbox_targets = cntk_output[bbt_key][0] cntk_bbox_inside_w = cntk_output[bbw_key][0] # Create Caffe layer and call forward bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeAnchorTargetLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_layer.set_deterministic_mode() caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top) # assert that results are exactly the same assert cntk_objectness_target.shape == caffe_objectness_target.shape assert cntk_bbox_targets.shape == caffe_bbox_targets.shape assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0) print("Verified AnchorTargetLayer")
def test_huber_loss(self): i1 = input_variable((2)) i2 = input_variable((2)) np.testing.assert_array_equal( huber_loss(i1, i2).eval({ i1: [[2, 1], [1, 5]], i2: [[4, 1], [1, 4]] }), [1.5, 0.5])
def create_inputs(vocab_dim): batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') input_dynamic_axes = [batch_axis, input_seq_axis] input_sequence = input_variable(shape=vocab_dim, dynamic_axes=input_dynamic_axes) label_sequence = input_variable(shape=vocab_dim, dynamic_axes=input_dynamic_axes) return input_sequence, label_sequence
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def test_proposal_layer(): from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer from FasterRCNN.FasterRCNN_config import cfg cls_prob_shape_cntk = (18,61,61) cls_prob_shape_caffe = (18,61,61) rpn_bbox_shape = (36, 61, 61) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) dims_info_var = input_variable(dims_info_shape) layer_config = {} layer_config["feat_stride"] = 16 layer_config["scales"] = [8, 16, 32] layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config)) state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input}) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:,1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def test_anchor_target_layer(): rpn_cls_score_shape_cntk = (1, 18, 61, 61) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 wh = np.random.random_sample((num_gt_boxes, 2)) * 400 x2y2 = x1y1 + wh + 50 label = np.random.random_sample((num_gt_boxes, 1)) label = (label * 17.0) gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) # Create CNTK layer and call forward rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk) gt_boxes_var = input_variable(gt_boxes_shape_cntk) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True)) state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input}) obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0] bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0] bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0] cntk_objectness_target = cntk_output[obj_key][0] cntk_bbox_targets = cntk_output[bbt_key][0] cntk_bbox_inside_w = cntk_output[bbw_key][0] # Create Caffe layer and call forward bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeAnchorTargetLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_layer.set_deterministic_mode() caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top) # assert that results are exactly the same assert cntk_objectness_target.shape == caffe_objectness_target.shape assert cntk_bbox_targets.shape == caffe_bbox_targets.shape assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0) print("Verified AnchorTargetLayer")
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def train_model(debug_output=False): # Create the minibatch source minibatch_source = create_reader(map_file) # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(image_input, num_classes) ce = cross_entropy_with_softmax(model, label_input) pe = classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] learner = momentum_sgd(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training image classifier for %s epochs." % max_epochs) log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save( os.path.join(output_model_folder, 'withcrops_{}.dnn'.format(epoch + 1))) return
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features : reader.streams.features, label : reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def _setup_test_model(self): inputs = input_variable(shape=(1,), dtype=np.float32) outputs = input_variable(shape=(1,), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = squared_error(q, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss }
def train_sequence_classifier(): input_dim = 2000; cell_dim = 25; hidden_dim = 25; embedding_dim = 50; num_output_classes = 5; # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes = [Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim, True, 'x' ), StreamConfiguration( labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0; while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {features : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = os.path.join(*"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) if not os.path.exists(path): readme_file = os.path.normpath(os.path.join(os.path.dirname(path), "..", "README.md")) raise RuntimeError("File '%s' does not exist. Please follow the instructions at %s to download and prepare it."%(path, readme_file)) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim ), StreamConfiguration( labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {input : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def ffnet(debug_output=False): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_samples_per_sweep = 10000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 60 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(0, int(num_minibatches_to_train)): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) print_training_progress(trainer, i, training_progress_output_freq) test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def test_not_replaced_placeholders(): def wrap_in_block(fun_args, name): block_args = [placeholder_variable(name=arg.name) for arg in fun_args ] # placeholders inside the BlockFunction combined_block_args = combine( block_args) # the content of the BlockFunction arg_map = list( zip(block_args, fun_args)) # after wrapping, the block_args map to args combined_args = as_block(composite=combined_block_args, block_arguments_map=arg_map, block_op_name=name) return combined_args input_dim = 2 x = input_variable(shape=(input_dim, )) p1 = placeholder_variable() p2 = placeholder_variable() a = abs(x) b = wrap_in_block(list(a.outputs) + [p1], "my_first_block") b = wrap_in_block(list(b.outputs) + [p2], "my_second_block") b = past_value(b.outputs[0]) model = b.replace_placeholders({p1: b.outputs[0], p2: b.outputs[0]}) x0 = [[1, 1], [2, 2]] with pytest.raises(RuntimeError): model.forward({x: x0}, model.outputs)
def test_not_replaced_placeholders(): def wrap_in_block(fun_args, name): block_args = [placeholder_variable(name=arg.name) for arg in fun_args] # placeholders inside the BlockFunction combined_block_args = combine(block_args) # the content of the BlockFunction arg_map = list(zip(block_args, fun_args)) # after wrapping, the block_args map to args combined_args = as_block(composite=combined_block_args, block_arguments_map=arg_map, block_op_name=name) return combined_args input_dim = 2 x = input_variable(shape=(input_dim,)) p1 = placeholder_variable() p2 = placeholder_variable() a = abs(x) b = wrap_in_block(list(a.outputs) + [p1], "my_first_block") b = wrap_in_block(list(b.outputs) + [p2], "my_second_block") b = past_value(b.outputs[0]) model = b.replace_placeholders({p1:b.outputs[0], p2:b.outputs[0]}) x0 = [[1, 1],[2, 2]] with pytest.raises(RuntimeError): model.forward({x : x0}, model.outputs)
def test_scalar_input(): scalar = input_variable((1, ), dtype=np.float32, name='tscalar') op = scalar + 1 lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(op, (op, None), sgd(op.parameters, lr_per_sample)) trainer.train_minibatch({scalar: np.zeros((2, 1), dtype=np.float32)})
def test_proposal_layer(): cls_prob_shape_cntk = (18, 61, 61) cls_prob_shape_caffe = (18, 61, 61) rpn_bbox_shape = (36, 61, 61) dims_info_shape = (6, ) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function( CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var)) state, cntk_output = cntk_layer.forward({ cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input }) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [ np.array([cls_prob_caffe]), np.array([rpn_bbox_pred]), np.array([im_info]) ] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:, 1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def test_output_to_retain(): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[[1]], [[2]]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, [z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input_variable((input_dim,)) x_placeholder = placeholder_variable() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim,)) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input_variable((proj_dim,)) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def _setup_test_model(self, *args, **kwargs): inputs = placeholder(shape=(1, )) outputs = input_variable(shape=(1, ), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = cross_entropy_with_softmax(q, outputs) return {'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss}
def test_output_to_retain(): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, ce, errs, [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant)]) in1_value = [[[1]], [[2]]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, [z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(tl_model, (ce, pe), learner) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) return tl_model
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.device import cpu, gpu, set_default_device from cntk.ops import input_variable, times from scipy.sparse import csr_matrix input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True) )), randomize=False, epoch_size = 2) batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=input_vocab_dim, dynamic_axes=input_dynamic_axes, name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input : mbs.streams.features}) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid) # CSR with the raw_input encoding in ctf_data one_hot_data = [ [3, 4, 5, 4, 7, 12, 1], [60, 61] ] data = [csr_matrix(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = one_hot(one_hot_data, num_classes=input_vocab_dim) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
def test_entropy(self): i = input_variable((2)) np.testing.assert_almost_equal( negative_of_entropy_with_softmax(i).eval({ i: [[0.5, 0.5], [1000, 1]] }), [-0.693147181, 0] )
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input_variable, times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True) )), randomize=False, epoch_size = 2) batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=input_vocab_dim, dynamic_axes=input_dynamic_axes, name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input : mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [ [3, 4, 5, 4, 7, 12, 1], [60, 61] ] data = [csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
def ffnet(debug_output=False): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net( input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_samples_per_sweep = 10000 num_sweeps_to_train_with = 2 num_minibatches_to_train = ( num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 60 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(0, int(num_minibatches_to_train)): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) print_training_progress(trainer, i, training_progress_output_freq) test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {input: test_features, label: test_labels}) return avg_error
def cifar_resnet(): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes) features_si = minibatch_source.stream_info(feats_stream_name) labels_si = minibatch_source.stream_info(labels_stream_name) # Input variables denoting the features and label data image_input = input_variable((num_channels, image_height, image_width), features_si.m_element_type) label_var = input_variable((num_classes), features_si.m_element_type) # Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.0078125) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb = minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { image_input: mb[features_si].m_data, label_var: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def create_recurrent_network(): # Input variables denoting the features and label data features = input_variable(((2*context+1)*feature_dim)) labels = input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input_variable((input_dim, )) x_placeholder = placeholder_variable() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim, )) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input_variable((proj_dim, )) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim, ), is_sparse=var_is_sparse) z = times(in1, multiplier * np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def create_recurrent_network(): # Input variables denoting the features and label data features = input_variable(((2 * context + 1) * feature_dim)) labels = input_variable((num_classes)) # create network model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) return { 'feature': features, 'label': labels, 'ce': ce, 'errs': errs, 'output': z }
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def test_eval_one_hot_seq(one_hot_batch, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) # Convert CNTK node value to dense so that we can compare it later z = times(in1, np.eye(dim)*multiplier) # Convert expectation to dense expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch] batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id)) result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net( input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {input: test_features, label: test_labels}) return avg_error
def test_disallow_seq_starts_with_Value_objects(): one_hot_batch = [[2,5], [0,1,6]] dim = 10 in1 = input_variable(shape=(dim,), is_sparse=True) z = times(in1, np.eye(dim)) batch = one_hot(one_hot_batch, num_classes=dim) with pytest.raises(ValueError): result = z.eval(({in1: batch}, len(batch)*[True])) with pytest.raises(ValueError): result = z.eval({in1: (batch, len(batch)*[True])})
def _setup_test_model(self, *args, **kwargs): inputs = placeholder(shape=(1,)) outputs = input_variable(shape=(1,), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = cross_entropy_with_softmax(q, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss }
def test_disallow_seq_starts_with_Value_objects(): one_hot_batch = [[2, 5], [0, 1, 6]] dim = 10 in1 = input_variable(shape=(dim, ), is_sparse=True) z = times(in1, np.eye(dim)) batch = one_hot(one_hot_batch, num_classes=dim) with pytest.raises(ValueError): result = z.eval(({in1: batch}, len(batch) * [True])) with pytest.raises(ValueError): result = z.eval({in1: (batch, len(batch) * [True])})
def cifar_resnet(): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes) features_si = minibatch_source.stream_info(feats_stream_name) labels_si = minibatch_source.stream_info(labels_stream_name) # Input variables denoting the features and label data image_input = input_variable((num_channels, image_height, image_width), features_si.m_element_type) label_var = input_variable((num_classes), features_si.m_element_type) # Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.0078125) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb=minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {image_input : mb[features_si].m_data, label_var : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def test_eval_sparse_seq_1(batch, device_id): dim = 4 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) if isinstance(batch[0], list): expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in batch] else: expected = [seq.todense() * multiplier for seq in batch] result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)]), \ "%s != %s"%(result,expected)
def train_and_evaluate(reader_train, reader_test, max_epochs): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) # apply model to input z = create_vgg9_model(input_var_norm, 10) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 minibatch_size = 64 # Set learning parameters lr_per_minibatch = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001], epoch_size, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9)) l2_reg_weight = 0.0001 # trainer object learner = momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentum_time_constant, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # # Evaluation action # epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 #progress_printer = ProgressPrinter(freq=100, first=10, tag='Eval') while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") # return evaluation error. return metric_numer/metric_denom