def train_faster_rcnn_e2e(cfg): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_model(image_input, roi_input, dims_node, cfg) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, cfg["CNTK"].E2E_MAX_EPOCHS, cfg) return create_faster_rcnn_eval_model(loss, image_input, dims_input, cfg)
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = globalvars['e2e_lr_factor'] e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs']) return create_eval_model(loss, image_input, dims_input)
def __init__(self, eval_model, cfg): # load model once in constructor and push images through the model in 'process_image()' self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) image_input = input_variable(shape=self._img_shape, dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') self._eval_model = eval_model(image_input, dims_input)
def __init__(self, eval_model, cfg): # load model once in constructor and push images through the model in 'process_image()' self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) image_input = input_variable(shape=self._img_shape, dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") self._eval_model = eval_model(image_input, roi_proposals) self._cfg = cfg
def eval_and_plot_faster_rcnn(eval_model, num_images_to_plot, test_map_file, img_shape, results_base_path, feature_node_name, classes, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): # get image paths with open(test_map_file) as f: content = f.readlines() img_base_path = os.path.dirname(os.path.abspath(test_map_file)) img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content] # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') frcn_eval = eval_model(image_input, dims_input) #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot) for i in range(0, num_images_to_plot): imgPath = img_file_names[i] # evaluate single image _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1,) + dims_input.shape output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(imgPath)), imgDebug) # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(imgPath)), img)
def test_op_reduce_over_batch_axis(input_data, device_id, precision): from .. import reduce_sum, reduce_max, reduce_min, reduce_mean, reduce_log_sum_exp, reduce_prod from cntk import Axis dt = PRECISION_TO_TYPE[precision] data = AA(input_data, dtype=dt) a = C.input_variable(shape=data.shape[1:], dtype=sanitize_dtype_cntk(dt), needs_gradient=True, name='a') ops = [ (reduce_sum, lambda x:np.sum(x, axis=0, keepdims=False), lambda x,f:np.ones_like(x)), (reduce_max, lambda x:np.amax(x, axis=0, keepdims=False), lambda x,f:min_max_bwd(x,f, dt)), (reduce_min, lambda x:np.amin(x, axis=0, keepdims=False), lambda x,f:min_max_bwd(x,f, dt)), (reduce_mean, lambda x:np.mean(x, axis=0, keepdims=False), lambda x,f:np.ones_like(x)/x.shape[0]), (reduce_log_sum_exp, lambda x:np.log(np.sum(np.exp(x), axis=0, keepdims=False)), lambda x,f:np.exp(x-f)), (reduce_prod, lambda x:np.prod(x, axis=0, keepdims=False), lambda x,f:f / x) ] for op,fwd,bwd in ops: input_op = op(a, axis=Axis.default_batch_axis()) expected_forward = fwd(data) expected_backward = bwd(data, expected_forward) binding = {a: data} actual_backward = input_op.grad(binding) actual_forward = input_op.eval(binding) assert np.allclose(actual_forward, expected_forward) for ab,eb in zip (actual_backward, expected_backward): assert np.allclose(ab, eb)
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim,)) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.device import cpu, gpu, set_default_device from cntk.ops import input_variable, times from scipy.sparse import csr_matrix input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True) )), randomize=False, epoch_size = 2) batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=input_vocab_dim, dynamic_axes=input_dynamic_axes, name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input : mbs.streams.features}) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid) # CSR with the raw_input encoding in ctf_data one_hot_data = [ [3, 4, 5, 4, 7, 12, 1], [60, 61] ] data = [csr_matrix(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = one_hot(one_hot_data, num_classes=input_vocab_dim) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
def create_inputs(vocab_dim): batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') input_dynamic_axes = [batch_axis, input_seq_axis] input_sequence = input_variable(shape=vocab_dim, dynamic_axes=input_dynamic_axes) label_sequence = input_variable(shape=vocab_dim, dynamic_axes=input_dynamic_axes) return input_sequence, label_sequence
def test_op_reduce_mean_all_constant(input_data, axis, device_id, precision): dt = PRECISION_TO_TYPE[precision] value = AA(input_data, dtype=dt) from .. import reduce_mean from cntk import Axis, Constant a = Constant(value, name='a') input_op = reduce_mean(a, axis=Axis.all_axes()) expected_forward = AA(np.mean(value)) actual_forward = input_op.eval() assert np.allclose(actual_forward, expected_forward)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features : reader.streams.features, label : reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train_sequence_classifier(): input_dim = 2000; cell_dim = 25; hidden_dim = 25; embedding_dim = 50; num_output_classes = 5; # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes = [Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim, True, 'x' ), StreamConfiguration( labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0; while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {features : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def test_op_reduce_mean_all_constant(input_data, axis, device_id, precision): # dt = PRECISION_TO_TYPE[precision] # FIXME: we'd like to do dt = PRECISION_TO_TYPE[precision] # however there seems to be an issue with actual_forward below # that gets computed correctly but by the time np.allclose executes # it contains garbage values. The problem goes away if one uses # actual_forward = np.copy(input_op.eval()) dt = np.float32 value = AA(input_data, dtype=dt) from .. import reduce_mean from cntk import Axis, Constant a = Constant(value, name='a') input_op = reduce_mean(a, axis=Axis.all_axes()) expected_forward = AA(np.mean(value)) actual_forward = input_op.eval() assert np.allclose(actual_forward, expected_forward)
def test_op_reduce_all(input_data, axis, device_id, precision): # FIXME: we'd like to do dt = PRECISION_TO_TYPE[precision] # however there seems to be an issue with actual_forward below # that gets computed correctly but by the time np.allclose executes # it contains garbage values. The problem goes away if one uses # actual_forward = np.copy(input_op.eval(binding)) dt = np.float32 data = AA(input_data, dtype=dt) a = I(shape=data.shape, dtype=sanitize_dtype_cntk(dt), needs_gradient=True, name='a') # create batch value = [AA([data,data-0.5], dtype=dt),AA([data+0.25], dtype=dt)] from .. import reduce_sum, reduce_max, reduce_min, reduce_mean, reduce_log_sum_exp, reduce_prod from cntk import Axis def max_bwd(x,f): y = np.zeros_like(x) yr = y.ravel() xr = x.ravel() for i in range(x.size): if xr[i] == f: yr[i] = 1 return y ops = [ (reduce_sum, lambda x:AA(sum(np.sum(xi) for xi in x)), lambda x,f:[np.ones_like(xi) for xi in x]), (reduce_max, lambda x:AA(max(np.max(xi) for xi in x)), lambda x,f:[max_bwd(xi,f) for xi in x]), (reduce_min, lambda x:AA(min(np.min(xi) for xi in x)), lambda x,f:[max_bwd(xi,f) for xi in x]), (reduce_mean, lambda x:AA(sum(np.sum(xi) for xi in x)/sum(xi.size for xi in x)), lambda x,f:[np.ones_like(xi)/sum(xj.size for xj in x) for xi in x]), (reduce_log_sum_exp, lambda x:AA(np.log(sum(np.sum(np.exp(xi)) for xi in x))), lambda x,f:[np.exp(xi-f) for xi in x]), (reduce_prod, lambda x:AA(np.prod([np.prod(xi) for xi in x])), lambda x,f:[f/xi for xi in x]) ] for op,fwd,bwd in ops: input_op = op(a, axis=Axis.all_axes()) expected_forward = fwd(value) expected_backward = bwd(value,expected_forward) binding = {a: value} actual_backward = input_op.grad(binding)[0] actual_forward = np.copy(input_op.eval(binding)) assert np.allclose(actual_forward, expected_forward) for ab,eb in zip (actual_backward, expected_backward): assert np.allclose(ab, eb)
def test_op_reduce_over_batch_axis(input_data, device_id, precision): from .. import reduce_sum, reduce_max, reduce_min, reduce_mean, reduce_log_sum_exp, reduce_prod from cntk import Axis dt = PRECISION_TO_TYPE[precision] data = AA(input_data, dtype=dt) a = C.input_variable(shape=data.shape[1:], dtype=sanitize_dtype_cntk(dt), needs_gradient=True, name='a') def min_max_bwd(x, f): forward_array = np.asarray(f, dtype=dt) min_max_elements = forward_array.reshape(forward_array.size).tolist() # place 1.0s where minimum or maximum elements are backward = np.zeros_like(x) for element in min_max_elements: backward += np.asarray(x == element) return backward ops = [ (reduce_sum, lambda x:np.sum(x, axis=0, keepdims=False), lambda x,f:np.ones_like(x)), (reduce_max, lambda x:np.amax(x, axis=0, keepdims=False), lambda x,f:min_max_bwd(x,f)), (reduce_min, lambda x:np.amin(x, axis=0, keepdims=False), lambda x,f:min_max_bwd(x,f)), (reduce_mean, lambda x:np.mean(x, axis=0, keepdims=False), lambda x,f:np.ones_like(x)/x.shape[0]), (reduce_log_sum_exp, lambda x:np.log(np.sum(np.exp(x), axis=0, keepdims=False)), lambda x,f:np.exp(x-f)), (reduce_prod, lambda x:np.prod(x, axis=0, keepdims=False), lambda x,f:f / x) ] for op,fwd,bwd in ops: input_op = op(a, axis=Axis.default_batch_axis()) expected_forward = fwd(data) expected_backward = bwd(data, expected_forward) binding = {a: data} actual_backward = input_op.grad(binding) actual_forward = input_op.eval(binding) assert np.allclose(actual_forward, expected_forward) for ab,eb in zip (actual_backward, expected_backward): assert np.allclose(ab, eb)
def test_recurrent_block(block_type, block_outputs_count, block_size, W_mult, H_mult, expected_res): input_shape = 4 sequenceAxis = Axis('sequenceAxis') y = C.input_variable(input_shape, dynamic_axes=[Axis.default_batch_axis(), sequenceAxis]) data = np.reshape(np.arange(0,16, dtype=np.float32), (1,4,4)) rnn_block = block_type(block_size, init=0.1) assert len(rnn_block.outputs) == block_outputs_count rnn_net = Recurrence(rnn_block)(y) assert rnn_net.b.shape == (W_mult*block_size,) assert rnn_net.W.shape == (input_shape, W_mult*block_size) assert rnn_net.H.shape == (block_size, H_mult*block_size) res = rnn_net.eval(data) expected = np.asarray(expected_res, dtype=np.float32) np.testing.assert_array_almost_equal(res[0], expected, decimal=6)
def test_op_reduce_all(input_data, axis, device_id, precision): dt = PRECISION_TO_TYPE[precision] data = AA(input_data, dtype=dt) a = C.sequence.input_variable(shape=data.shape, dtype=sanitize_dtype_cntk(dt), needs_gradient=True, name='a') # create batch value = [AA([data,data-0.5], dtype=dt),AA([data+0.25], dtype=dt)] from .. import reduce_sum, reduce_max, reduce_min, reduce_mean, reduce_log_sum_exp, reduce_prod from cntk import Axis def max_bwd(x,f): y = np.zeros_like(x) yr = y.ravel() xr = x.ravel() for i in range(x.size): if xr[i] == f: yr[i] = 1 return y ops = [ (reduce_sum, lambda x:AA(sum(np.sum(xi) for xi in x)), lambda x,f:[np.ones_like(xi) for xi in x]), (reduce_max, lambda x:AA(max(np.max(xi) for xi in x)), lambda x,f:[max_bwd(xi,f) for xi in x]), (reduce_min, lambda x:AA(min(np.min(xi) for xi in x)), lambda x,f:[max_bwd(xi,f) for xi in x]), (reduce_mean, lambda x:AA(sum(np.sum(xi) for xi in x)/sum(xi.size for xi in x)), lambda x,f:[np.ones_like(xi)/sum(xj.size for xj in x) for xi in x]), (reduce_log_sum_exp, lambda x:AA(np.log(sum(np.sum(np.exp(xi)) for xi in x))), lambda x,f:[np.exp(xi-f) for xi in x]), (reduce_prod, lambda x:AA(np.prod([np.prod(xi) for xi in x])), lambda x,f:[f/xi for xi in x]) ] for op,fwd,bwd in ops: input_op = op(a, axis=Axis.all_axes()) expected_forward = fwd(value) expected_backward = bwd(value,expected_forward) binding = {a: value} actual_backward = input_op.grad(binding) actual_forward = input_op.eval(binding) assert np.allclose(actual_forward, expected_forward) for ab,eb in zip (actual_backward, expected_backward): assert np.allclose(ab, eb)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source[features] labels_si = mb_source[label] # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 if debug_output: training_progress_output_freq = training_progress_output_freq/3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {features: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average()) loss_average = copy.copy(trainer.previous_minibatch_loss_average()) return evaluation_average, loss_average
def evalImage(url): # set image eval_model = load_model(model_path) classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( url, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # evaluate test images and write netwrok output to file all_gt_infos = {key: [] for key in classes} img_i = 0 mb_data = minibatch_source.next_minibatch(url, 1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) result = dict() for label in LabelList: result.update({label: 0}) for index, label in enumerate(labels): if result[LabelList[int(label)]] < scores[index]: result.update({LabelList[int(label)]: scores[index]}) pp = pprint.PrettyPrinter(indent=4) print("---------------------") print(url) pp.pprint(result) print("---------------------") for number, accuracy in result.items(): result.update({number: str(accuracy)}) return result
def train_sequence_to_sequence_translator(): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model input_dynamic_axes = [ Axis('inputAxis'), Axis.default_batch_axis() ] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes = input_dynamic_axes) label_dynamic_axes = [ Axis('labelAxis'), Axis.default_batch_axis() ] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes = label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_dynamic_axes[0], 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(encoder_outputH, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i == 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(decoder_outputH, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_vocab_dim, True, 'S0' ), StreamConfiguration( labels_stream_name, label_vocab_dim, True, 'S1') ], 10000) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.007) momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 10 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [ x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE ] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [ x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE ] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output = cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage1_rpn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor( conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine( [rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot( stage1_frcn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage2_rpn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], [ "cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error" ], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot( stage2_frcn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output: net_output.output}) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, ce, errs, learner) # setup data train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.train-dev-20-21.ctf") valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "tiny.ctf") # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = create_reader(train_path, randomize_data, input_vocab_dim, label_vocab_dim) train_bind = { raw_input: train_reader.streams.features, raw_labels: train_reader.streams.labels } # get the vocab for printing output sequences in plaintext vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.mapping") vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = {i: ch for i, ch in enumerate(vocab)} # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 minibatch_size = 72 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 valid_reader = create_reader(valid_path, False, input_vocab_dim, label_vocab_dim) valid_bind = { find_arg_by_name('raw_input', ng): valid_reader.streams.features, find_arg_by_name('raw_labels', ng): valid_reader.streams.labels } for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind) e = ng.eval(mb_valid) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[raw_labels].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer / denom, 100.0 * (metric_numer / denom))) error1 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) z.save_model("seq2seq.dnn") z.restore_model("seq2seq.dnn") label_seq_axis = Axis('labelAxis') label_sequence = sequence.slice(find_arg_by_name('raw_labels', z), 1, 0) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) trainer = Trainer(z, ce, errs, [ momentum_sgd(z.parameters, lr_per_minibatch, momentum_time_constant, True, clipping_threshold_per_sample, gradient_clipping_with_truncation) ]) error2 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) assert error1 == error2 return error1
def eval_faster_rcnn(eval_model, imgPath, img_shape, results_base_path, feature_node_name, classes, mode, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold=0.8): # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1, 6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') # if your model hasn't been loaded proper or in the proper place this is the line to look at. This line takes a model and loads it # in preparation for the evaluation try: frcn_eval = eval_model(image_input, dims_input) except: raise TypeError("Loading existing model from %s" % model_path) print("Plotting results from Faster R-CNN model for image.") _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1, ) + dims_input.shape output = frcn_eval.eval({ frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if mode == "returntags": class Tag(object): def __init__(self, label, score, bbox): self.label = label self.score = score self.bbox = bbox def serialize(self): return { 'label': self.label, 'score': self.score, 'bbox': self.bbox, } results = [] for i in range(len(out_rpn_rois)): if labels[i] != 0: x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i])) results.append(x) return results elif mode == "returnimage": evaluated_image_path = "{}/{}".format( results_base_path, 'evaluated_' + os.path.basename(imgPath)) if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster( imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, imgDebug) else: # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results( regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, img) return evaluated_image_path else: raise ValueError("Unsupported value found in 'mode' parameter")
# define the reader # ######################## def create_reader(path, is_training): return MinibatchSource(CTFDeserializer(path, StreamDefs( features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True) )), randomize = is_training, max_sweeps = INFINITELY_REPEAT if is_training else 1) ######################## # define the model # ######################## # type annotations for the two sequence types; later use InputSequence[Tensor[input_vocab_dim]] # CNTK considers these two different types since they run over different sequence indices. inputAxis = Axis('inputAxis') labelAxis = Axis('labelAxis') InputSequence = SequenceOver[inputAxis] LabelSequence = SequenceOver[labelAxis] # create the s2s model def create_model(): # :: (history*, input*) -> logP(w)* # Embedding: (input*) --> embedded_input* # Right now assumes shared embedding and shared vocab size. embed = Embedding(embedding_dim, name='embed') if use_embedding else identity # Encoder: (input*) --> (h0, c0) # Create multiple layers of LSTMs by passing the output of the i-th layer # to the (i+1)th layer as its input # This is the plain s2s encoder. The attention encoder will keep the entire sequence instead. # Note: We go_backwards for the plain model, but forward for the attention model.
def create_network(input_vocab_dim, label_vocab_dim): # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) return { 'raw_input' : raw_input, 'raw_labels' : raw_labels, 'ce' : ce, 'pe' : errs, 'ng' : ng, 'output': z }
def eval_faster_rcnn_mAP(eval_model): img_map_file = globalvars['test_map_file'] roi_map_file = globalvars['test_roi_file'] classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( img_map_file, roi_map_file, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where( all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1, ) scores.shape = scores.shape + (1, ) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, globalvars['num_classes']): coords_score_label_for_cls = coords_score_label[np.where( coords_score_label[:, -1] == cls_j)] all_boxes[cls_j][ img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32, copy=False) if (img_i + 1) % 100 == 0: print("Processed {} samples".format(img_i + 1)) # calculate mAP aps = evaluate_detections( all_boxes, all_gt_infos, classes, nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, conf_threshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD) ap_list = [] for class_name in aps: ap_list += [aps[class_name]] print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) meanAP = np.nanmean(ap_list) print('Mean AP = {:.4f}'.format(meanAP)) return meanAP
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [ x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE ] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [ x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE ] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn( conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage1_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor( conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine( [rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot( stage1_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage2_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], [ "cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error" ], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot( stage2_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def sequence_to_sequence_translator(debug_output=False): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes) label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output(), hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select( is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output(), hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr = 0.007 momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample( math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [ momentum_sgd(z.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation) ]) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], 10000) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 30 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], 10000, False) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # choose this to be big enough for the longest sentence train_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_mb_source.get_next_minibatch(train_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def eval_and_plot_faster_rcnn(eval_model, num_images_to_plot, test_map_file, img_shape, results_base_path, feature_node_name, classes, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): # get image paths with open(test_map_file) as f: content = f.readlines() img_base_path = os.path.dirname(os.path.abspath(test_map_file)) img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content] # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') frcn_eval = eval_model(image_input, dims_input) #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot) for i in range(0, num_images_to_plot): imgPath = img_file_names[i] # evaluate single image _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1,) + dims_input.shape output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(imgPath)), imgDebug) # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) # filtered_bboxes = regressed_rois[nmsKeepIndices] # # print(filtered_bboxes) # filtered_labels = labels[nmsKeepIndices] # filtered_scores = scores[nmsKeepIndices] img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) # img = visualizeResultsFaster(imgPath,filtered_lables, filtered_scores, regressed_filtered_bboxes, img_shape[2], img_shape[1], # classes, nmsKeepIndices=nmsKeepIndices, # boDrawNegativeRois=drawNegativeRois, # decisionThreshold=bgrPlotThreshold) imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(imgPath)), img)
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) # Instantiate the trainer object to drive the model training lr = 0.007 minibatch_size = 72 momentum_time_constant = 1100 m_schedule = momentum_schedule(momentum_time_constant) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd( z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # setup data rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tiny.ctf") feature_stream_name = 'features' labels_stream_name = 'labels' # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = text_format_minibatch_source(train_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], randomize=randomize_data) features_si_tr = train_reader.stream_info(feature_stream_name) labels_si_tr = train_reader.stream_info(labels_stream_name) valid_reader = text_format_minibatch_source(valid_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], randomize=False) features_si_va = valid_reader.stream_info(feature_stream_name) labels_si_va = valid_reader.stream_info(labels_stream_name) # get the vocab for printing output sequences in plaintext rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping" vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = { i:ch for i,ch in enumerate(vocab) } # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) train_args = {'raw_input': mb_train[features_si_tr], 'raw_labels': mb_train[labels_si_tr]} trainer.train_minibatch(train_args) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size) valid_args = {'raw_input': mb_valid[features_si_va], 'raw_labels': mb_valid[labels_si_va]} e = ng.eval(valid_args) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[labels_si_tr].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer/denom, 100.0*(metric_numer/denom))) # now setup a test run rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_reader = text_format_minibatch_source(test_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], 10000, randomize=False) features_si_te = test_reader.stream_info(feature_stream_name) labels_si_te = test_reader.stream_info(labels_stream_name) test_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_reader.next_minibatch(test_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si_te], raw_labels: mb[labels_si_te]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def train_sequence_classifier(device): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 features = variable(shape=input_dim, is_sparse=True, name="features") classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim, device) label = variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()], name="labels") ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) #TODO: add save and load module code lstm_net = combine([ce, pe, classifier_output], "classifier_model") rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) cm = create_text_mb_source(path, input_dim, num_output_classes, 0, True, False, "x", "y") stream_infos = cm.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si minibatch_size = 200 lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgdlearner(classifier_output.owner.parameters(), lr)]) freq = 1 i = 0 cntk_dev = cntk_device(device) while True: mb = cm.get_next_minibatch(minibatch_size, cntk_dev) if len(mb) == 0: break arguments = dict() arguments[features] = mb[features_si].m_data arguments[label] = mb[labels_si].m_data trainer.train_minibatch(arguments, cntk_dev) if i % freq == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print( "Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}" .format(i, training_loss, eval_crit)) i += 1
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y') ], 0) features_si = mb_source[features] labels_si = mb_source[label] # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {features: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average()) loss_average = copy.copy(trainer.previous_minibatch_loss_average()) return evaluation_average, loss_average
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input_variable, times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels=StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True))), randomize=False, epoch_size=2) batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=input_vocab_dim, dynamic_axes=input_dynamic_axes, name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input: mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]] data = [ csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data ] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
ix_to_char = { i:ch for i,ch in enumerate(chars) } minibatch_size=100 def sample(p): xi = [char_to_ix[ch] for ch in data[p:p+minibatch_size]] yi = [char_to_ix[ch] for ch in data[p+1:p+minibatch_size+1]] X = np.eye(vocab_size, dtype=np.float32)[xi] Y = np.eye(vocab_size, dtype=np.float32)[yi] return [X], [Y] sample(0) input_seq_axis = Axis('inputAxis') input_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) label_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) # model = Sequential([Dense(300),Dense(vocab_size)]) model = Sequential([ For(range(2), lambda: Sequential([Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size)]) z = model(input_sequence) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence)
def compute_test_set_aps(eval_model, cfg): num_test_images = cfg["DATA"].NUM_TEST_IMAGES classes = cfg["DATA"].CLASSES image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, roi_proposals) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: try: cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE = os.path.join(cfg["DATA"].MAP_FILE_PATH, cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE) proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TEST_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) except: print("To use precomputed proposals please specify the following parameters in your configuration:\n" "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n" "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE") exit(-1) else: proposal_provider = ProposalProvider.fromconfig(cfg) minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TEST_MAP_FILE, cfg["DATA"].TEST_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=False, use_flipping=False, max_images=cfg["DATA"].NUM_TEST_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=False) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.proposals_si: roi_proposals, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)] # evaluate test images and write netwrok output to file print("Evaluating Fast R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes)}) output = frcn_eval.eval({image_input: mb_data[image_input], roi_proposals: mb_data[roi_proposals]}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = mb_data[roi_proposals].data.asarray() out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1,) scores.shape = scores.shape + (1,) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, cfg["DATA"].NUM_CLASSES): coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) if (img_i+1) % 100 == 0: print("Processed {} samples".format(img_i+1)) # calculate mAP aps = evaluate_detections(all_boxes, all_gt_infos, classes, use_gpu_nms = cfg.USE_GPU_NMS, device_id = cfg.GPU_ID, nms_threshold=cfg.RESULTS_NMS_THRESHOLD, conf_threshold = cfg.RESULTS_NMS_CONF_THRESHOLD) return aps
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def sequence_to_sequence_translator(debug_output=False): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis("inputAxis") label_seq_axis = Axis("labelAxis") input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes) label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output(), hidden_dim, hidden_dim, future_value, future_value ) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select( is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth) ) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if i > 0: recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output(), hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC ) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr = 0.007 momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer( z, ce, errs, [ momentum_sgd( z.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation, ) ], ) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = "features" labels_stream_name = "labels" mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"), StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"), ], 10000, ) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 30 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"), StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"), ], 10000, False, ) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # choose this to be big enough for the longest sentence train_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_mb_source.get_next_minibatch(train_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def eval_faster_rcnn_mAP(eval_model): img_map_file = globalvars['test_map_file'] roi_map_file = globalvars['test_roi_file'] classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( img_map_file, roi_map_file, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes)}) output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1,) scores.shape = scores.shape + (1,) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, globalvars['num_classes']): coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) if (img_i+1) % 100 == 0: print("Processed {} samples".format(img_i+1)) # calculate mAP aps = evaluate_detections(all_boxes, all_gt_infos, classes, nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD) ap_list = [] for class_name in aps: ap_list += [aps[class_name]] print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) meanAP = np.nanmean(ap_list) print('Mean AP = {:.4f}'.format(meanAP)) return meanAP
break trainer.train_minibatch(data) loss_numer += trainer.previous_minibatch_loss_average() * trainer.previous_minibatch_sample_count() # too much code for something this simple loss_denom += trainer.previous_minibatch_sample_count() metric_numer += trainer.previous_minibatch_evaluation_average() * trainer.previous_minibatch_sample_count() metric_denom += trainer.previous_minibatch_sample_count() print_training_progress(trainer, mbs if mbs > 10 else 0, num_mbs_to_show_result) t += num_samples[slot_labels] #print (num_samples[slot_labels], t) mbs += 1 print("--- EPOCH {} DONE: loss = {:0.6f} * {}, metric = {:0.1f}% * {} ---".format(epoch+1, loss_numer/loss_denom, loss_denom, metric_numer/metric_denom*100.0, metric_denom)) return loss_numer/loss_denom, metric_numer/metric_denom ############################# # main function boilerplate # ############################# if __name__=='__main__': # TODO: get closure on Amit's feedback "Not the right pattern as we discussed over email. Please change to set_default_device(gpu(0))" #set_gpu(0) #set_computation_network_trace_level(1) # TODO: remove debugging facilities once this all works reader = create_reader(data_dir + "/atis.train.ctf") model = create_model(_inf=_Infer(shape=input_dim, axis=[Axis.default_batch_axis(), Axis.default_dynamic_axis()])) # TODO: Currently this fails with a mismatch error if axes ^^ are given in opposite order. I think it shouldn't. # train train(reader, model, max_epochs=8) # test (TODO) reader = create_reader(data_dir + "/atis.test.ctf") #test(reader, model_dir + "/slu.cmf") # TODO: what is the correct pattern here?
def compute_test_set_aps(eval_model, cfg): num_test_images = 10 #cfg["DATA"].NUM_TEST_IMAGES classes = cfg["DATA"].CLASSES image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TEST_MAP_FILE, cfg["DATA"].TEST_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=False, use_flipping=False, max_images=cfg["DATA"].NUM_TEST_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=None) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where( all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1, ) scores.shape = scores.shape + (1, ) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, cfg["DATA"].NUM_CLASSES): coords_score_label_for_cls = coords_score_label[np.where( coords_score_label[:, -1] == cls_j)] all_boxes[cls_j][ img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32, copy=False) if (img_i + 1) % 100 == 0: print("Processed {} samples".format(img_i + 1)) # calculate mAP aps = evaluate_detections(all_boxes, all_gt_infos, classes, use_gpu_nms=cfg.USE_GPU_NMS, device_id=cfg.GPU_ID, nms_threshold=cfg.RESULTS_NMS_THRESHOLD, conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD) return aps
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name = "roi_proposals") label_targets = input_variable((cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold = cfg.BBOX_THRESH, normalize_means = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
def eval_faster_rcnn(eval_model, imgPath, img_shape, results_base_path, feature_node_name, classes, mode, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') frcn_eval = eval_model(image_input, dims_input) #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) print("Plotting results from Faster R-CNN model for image.") # evaluate single image _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1,) + dims_input.shape output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if mode=="returntags": class Tag(object): def __init__(self, label, score, bbox): self.label = label self.score = score self.bbox = bbox def serialize(self): return { 'label': self.label, 'score': self.score, 'bbox': self.bbox, } results = [] regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) print(len(out_rpn_rois)) imsave('./Temp/resized.jpg',imgDebug) for i in range(len(out_rpn_rois)): if labels[i] != 0: x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i])) results.append(x) # return {} return results elif mode=="returnimage": evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath)) if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, imgDebug) else: # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) img,allboxes = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) # imsave(evaluated_image_path, img) allboxes=np.array(allboxes) # perform non-maximum suppression on the bounding boxes pick = non_max_suppression_fast(allboxes, 0.6) # print("[x] after applying non-maximum, %d bounding boxes" % (len(pick))) black_bg = 0*np.ones_like(img) # loop over the picked bounding boxes and extract each of the box for (startX, startY, endX, endY) in pick: roi=img[startY:endY,startX:endX] black_bg[startY:endY,startX:endX]=roi result = black_bg.copy() print(black_bg.shape) image = cv2.cvtColor(black_bg, cv2.COLOR_RGB2HSV) lower = np.array([18, 0, 0]) upper = np.array([179, 255, 255]) mask = cv2.inRange(image, lower, upper) result = cv2.bitwise_and(result,result, mask=mask) lengthThroughRotatedRectangle=[] lengthThroughManualCalculation=[] # length Calculation through Rotated Rectangle image=cv2.cvtColor(result,cv2.COLOR_BGR2GRAY) ret,thresh = cv2.threshold(image,127,255,0) working_image=thresh.copy() result_img=thresh.copy() working_image[:,:]=0 result_img[:,:]=0 kernel = np.ones((7,7), np.uint8) for (startX, startY, endX, endY) in pick: cord=(int(startX),int(startY),int(endX),int(endY)) working_image[:,:]=0 working_image[cord[1]:cord[3],cord[0]:cord[2]]=thresh[cord[1]:cord[3],cord[0]:cord[2]] result_img[cord[1]:cord[3],cord[0]:cord[2]]=thresh[cord[1]:cord[3],cord[0]:cord[2]] working_image=cv2.morphologyEx(working_image, cv2.MORPH_OPEN, kernel) contours, hierarchy = cv2.findContours(working_image,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if(len(contours)>0): c = max(contours, key = cv2.contourArea) rect = cv2.minAreaRect(c) (x, y), (width, height), angle=rect height=int(height) width=int(width) if width >height: height=width lengthThroughRotatedRectangle.append(height); # length Calculation through Manual process for (startX, startY, endX, endY) in pick: cord=(int(startX),int(startY),int(endX),int(endY)) widthofRectangle=cord[2]-cord[0] NonZeroPixels=[] threshold=0.20; height=0 width=0 for i in range(startY,endY): row=thresh[i,startX:endX] NonZeroPixelsInRow=np.count_nonzero(row) WidthRatioInRow=NonZeroPixelsInRow/widthofRectangle if(WidthRatioInRow>threshold): height=height+1 NonZeroPixels.append(NonZeroPixelsInRow) width=round(sum(NonZeroPixels)/len(NonZeroPixels)) if(width > height): height=width lengthThroughManualCalculation.append(height) # print("length through rotatedRectangle\n",lengthThroughRotatedRectangle) # print("length through ManualCalculation\n",lengthThroughManualCalculation) SpikesLength=[int((a+b)/2) for a,b in zip(lengthThroughRotatedRectangle,lengthThroughManualCalculation)] # print("spike length\n",SpikesLength) SpikesLength = [i * 0.1 for i in SpikesLength] print("Spike Lenght in cm",SpikesLength) # imsave(evaluated_image_path, thresh) return SpikesLength else: raise ValueError("Unsupported value found in 'mode' parameter")
def eval_faster_rcnn(eval_model, imgPath, img_shape, results_base_path, feature_node_name, classes, mode, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') frcn_eval = eval_model(image_input, dims_input) #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) print("Plotting results from Faster R-CNN model for image.") # evaluate single image _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1,) + dims_input.shape output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if mode=="returntags": class Tag(object): def __init__(self, label, score, bbox): self.label = label self.score = score self.bbox = bbox def serialize(self): return { 'label': self.label, 'score': self.score, 'bbox': self.bbox, } results = [] for i in range(len(out_rpn_rois)): if labels[i] != 0: x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i])) results.append(x) return results elif mode=="returnimage": evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath)) if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, imgDebug) else: # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, img) return evaluated_image_path else: raise ValueError("Unsupported value found in 'mode' parameter")
# Train data reader train_reader = create_reader(train_file, True) # Validation/Test data reader valid_reader = create_reader(valid_file, False) model_dir = "." # we downloaded our data to the local directory above # TODO check me # model dimensions input_vocab_dim = input_vocab_size label_vocab_dim = label_vocab_size hidden_dim = 128 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, ce, errs, learner) # setup data train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.train-dev-20-21.ctf") valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "tiny.ctf") # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = create_reader(train_path, randomize_data, input_vocab_dim, label_vocab_dim) train_bind = { raw_input : train_reader.streams.features, raw_labels : train_reader.streams.labels } # get the vocab for printing output sequences in plaintext vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.mapping") vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = { i:ch for i,ch in enumerate(vocab) } # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 minibatch_size = 72 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 valid_reader = create_reader(valid_path, False, input_vocab_dim, label_vocab_dim) valid_bind = { find_arg_by_name('raw_input',ng) : valid_reader.streams.features, find_arg_by_name('raw_labels',ng) : valid_reader.streams.labels } for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind) e = ng.eval(mb_valid) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[raw_labels].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer/denom, 100.0*(metric_numer/denom))) error1 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) z.save_model("seq2seq.dnn") z.restore_model("seq2seq.dnn") label_seq_axis = Axis('labelAxis') label_sequence = sequence.slice(find_arg_by_name('raw_labels',z), 1, 0) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) trainer = Trainer(z, ce, errs, [momentum_sgd( z.parameters, lr_per_minibatch, momentum_time_constant, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) error2 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) assert error1 == error2 return error1
def create_model(): # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice( raw_labels, 1, 0, name='label_sequence') # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> # Setup primer for decoder is_first_label = sequence.is_first(label_sequence) # 1 0 0 0 ... label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder stabilize = Stabilizer() encoder_output_h = stabilize(input_sequence) for i in range(0, num_layers): (encoder_output_h, encoder_output_c) = LSTM_layer(encoder_output_h.output, hidden_dim, future_value, future_value) # Prepare encoder output to be used in decoder thought_vector_h = sequence.first(encoder_output_h) thought_vector_c = sequence.first(encoder_output_c) thought_vector_broadcast_h = sequence.broadcast_as(thought_vector_h, label_sequence) thought_vector_broadcast_c = sequence.broadcast_as(thought_vector_c, label_sequence) # Decoder decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) decoder_output_h = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hook_h = past_value recurrence_hook_c = past_value else: recurrence_hook_h = lambda operand: element_select( is_first_label, thought_vector_broadcast_h, past_value(operand) ) recurrence_hook_c = lambda operand: element_select( is_first_label, thought_vector_broadcast_c, past_value(operand) ) (decoder_output_h, decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim, recurrence_hook_h, recurrence_hook_c) # Linear output layer W = parameter(shape=(decoder_output_h.shape[0], label_vocab_dim), init=glorot_uniform()) B = parameter(shape=(label_vocab_dim), init=0) z = plus(B, times(stabilize(decoder_output_h), W)) return z
features=StreamDef(field="S0", shape=input_vocab_dim, is_sparse=True), labels=StreamDef(field="S1", shape=label_vocab_dim, is_sparse=True), ), ), randomize=is_training, max_sweeps=INFINITELY_REPEAT if is_training else 1, ) ######################## # define the model # ######################## # type annotations for the two sequence types; later use InputSequence[Tensor[input_vocab_dim]] # CNTK considers these two different types since they run over different sequence indices. inputAxis = Axis("inputAxis") labelAxis = Axis("labelAxis") InputSequence = SequenceOver[inputAxis] LabelSequence = SequenceOver[labelAxis] # create the s2s model def create_model(): # :: (history*, input*) -> logP(w)* # Embedding: (input*) --> embedded_input* # Right now assumes shared embedding and shared vocab size. embed = Embedding(embedding_dim, name="embed") if use_embedding else identity # Encoder: (input*) --> (h0, c0) # Create multiple layers of LSTMs by passing the output of the i-th layer # to the (i+1)th layer as its input # This is the plain s2s encoder. The attention encoder will keep the entire sequence instead. # Note: We go_backwards for the plain model, but forward for the attention model.
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable( (cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") label_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot( loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [ x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE ] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_parameter_schedule_per_sample( lr_per_sample_scaled) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [ v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE ] bias_lr_schedule = learning_parameter_schedule_per_sample( bias_lr_per_sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile( cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold=cfg.BBOX_THRESH, normalize_means=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg[ "DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min( cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model