def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim,)) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = sequence.input((input_dim, )) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input((model1_dim, )) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = sequence.input((model2_dim, )) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({ x: [x_data], model1_label: [model1_label_data], model2_label: [model2_label_data] })
def test_trainer(tmpdir, no_eval_function): in1 = input(shape=(1, )) labels = input(shape=(1, )) p = parameter(shape=(2, ), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [ momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True) ]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def test_trainer(tmpdir, no_eval_function): in1 = C.input_variable(shape=(1,)) labels = C.input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1) trainer = C.Trainer(z, (ce, errs), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]} trainer.save_checkpoint(p, external_state) restored_state = trainer.restore_from_checkpoint(p) assert external_state == restored_state assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], C.Learner)
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg): # The losses are normalized by the batch size # classification loss p_cls_score = placeholder() p_label_targets = placeholder() cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1) cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_score, cls_score), (p_label_targets, label_targets)], 'CrossEntropyWithSoftmax', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input_variable(shape=(input_dim,)) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim,), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input_variable(shape=(proj_dim,)) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1],[2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg): # The losses are normalized by the batch size # classification loss p_cls_score = placeholder() p_label_targets = placeholder() cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1) cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_score, cls_score), (p_label_targets, label_targets)], 'CrossEntropyWithSoftmax', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights): # classification loss cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1) p_cls_loss = placeholder() p_rois = placeholder() # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois roi_indicator = reduce_sum(p_rois, axis=1) cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0)) cls_normalization_factor = 1.0 / cls_num_terms normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_loss, cls_loss), (p_rois, rois)], 'Normalize', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) # The bbox loss is normalized by the batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def create_bn_inception(): # Input variables denoting the features and label data feature_var = input_variable((NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH)) label_var = input_variable((NUM_CLASSES)) bn_time_const = 4096 z = bn_inception_cifar_model(feature_var, NUM_CLASSES, bn_time_const) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label' : label_var, 'ce' : ce, 'pe' : pe, 'pe5' : pe5, 'output' : z }
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input(shape=(input_dim, )) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim, ), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input(shape=(proj_dim, )) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1], [2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights): # classification loss cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1) p_cls_loss = placeholder() p_rois = placeholder() # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois roi_indicator = reduce_sum(p_rois, axis=1) cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0)) cls_normalization_factor = 1.0 / cls_num_terms normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor reduced_cls_loss = cntk.as_block(normalized_cls_loss, [(p_cls_loss, cls_loss), (p_rois, rois)], 'Normalize', 'norm_cls_loss') # regression loss p_bbox_pred = placeholder() p_bbox_targets = placeholder() p_bbox_inside_weights = placeholder() bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0) # The bbox loss is normalized by the batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor reduced_bbox_loss = cntk.as_block(normalized_bbox_loss, [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)], 'SmoothL1Loss', 'norm_bbox_loss') detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses") return detection_losses
def _setup_test_model(self, *args, **kwargs): inputs = placeholder(shape=(1, )) outputs = input_variable(shape=(1, ), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = cross_entropy_with_softmax(q, outputs) return {'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss}
def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones): minibatch_size = 100; epocs=900; minibatchIteraciones = int(len(entrOperaciones) / minibatch_size); # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = crearRed(input_dim, num_output_classes, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs); trainer = Trainer(netout, (ce, pe), learner, progress_printer) if os.path.isfile(checkpoint): trainer.restore_from_checkpoint(checkpoint); npentrRuedas = np.array(entrRuedas).astype(np.float32); npentrOperaciones = np.array(entrOperaciones).astype(np.float32); #iteramos una vez por cada "epoc" for i in range(0, epocs): p = np.random.permutation(len(entrRuedas)); npentrOperaciones = npentrOperaciones[p]; npentrRuedas = npentrRuedas[p]; #ahora partimos los datos en "minibatches" y entrenamos for j in range(0, minibatchIteraciones): features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size]; labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size]; trainer.train_minibatch({feature: features, label: labels}); trainer.summarize_training_progress() trainer.save_checkpoint(checkpoint); minibatchIteraciones = int(len(testOperaciones) / minibatch_size); avg_error = 0; for j in range(0, minibatchIteraciones): test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); #test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32); #test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32); avg_error = avg_error + ( trainer.test_minibatch( {feature: test_features, label: test_labels}) / minibatchIteraciones) return avg_error
def create_trainer(use_sparse, device): a = C.sequence.input(shape=input_shape, is_sparse=use_sparse, name='input') w = C.parameter(init=w_init, device=dev) z = times(a, w) l = C.sequence.input(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w, trainer)
def create_trainer(use_sparse, device): a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w = C.parameter(init=w_init, device=dev) z = times(a, w) l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w, trainer)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input(shape=input_dim, is_sparse=True) label = input(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train_model(debug_output=False): # Create the minibatch source minibatch_source = create_reader(map_file) # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(image_input, num_classes) ce = cross_entropy_with_softmax(model, label_input) pe = classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] learner = momentum_sgd(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training image classifier for %s epochs." % max_epochs) log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save( os.path.join(output_model_folder, 'withcrops_{}.dnn'.format(epoch + 1))) return
def create_sample_model(device, writer=None, lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])): in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def _setup_test_model(self, *args, **kwargs): inputs = placeholder(shape=(1,)) outputs = input_variable(shape=(1,), dtype=np.float32) q = Dense(1, activation=None)(inputs) loss = cross_entropy_with_softmax(q, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss }
def create_sample_model(device, writer=None): in1 = sequence.input(shape=(input_dim, )) labels = sequence.input(shape=(input_dim, )) p = parameter(shape=(input_dim, ), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = C.input_variable((input_dim,)) x_placeholder = C.placeholder() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim,)) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(C.combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = C.input_variable((proj_dim,)) lr_schedule = C.learning_parameter_schedule(0.003, minibatch_size =1) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe), C.sgd(ce.parameters, lr=lr_schedule))
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input((input_dim,)) x_placeholder = placeholder() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim,)) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input((proj_dim,)) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def test_output_to_retain(): in1 = C.input_variable(shape=(1,)) labels = C.input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1) trainer = C.Trainer(z, (ce, errs), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input_variable((input_dim,)) x_placeholder = placeholder_variable() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim,)) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input_variable((proj_dim,)) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def create_trainer(use_sparse, device): a = C.sequence.input(shape=input_shape, is_sparse=use_sparse, name='input') w_i = C.parameter(init=w_init_i, device=dev) a_projection = times(a, w_i) p_o = C.placeholder() h = C.sequence.past_value(p_o) w_h = C.parameter(init=w_init_h, device=dev) h_projection = times(h, w_h) z = a_projection + h_projection z = z.replace_placeholder(z) z = reshape(z, label_shape) l = C.sequence.input(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w_i, w_h, trainer)
def test_output_to_retain(): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[[1]], [[2]]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def create_trainer(use_sparse, device): a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w_i = C.parameter(init=w_init_i, device=dev) a_projection = times(a, w_i) p_o = C.placeholder() h = C.sequence.past_value(p_o) w_h = C.parameter(init=w_init_h, device=dev) h_projection = times(h, w_h) z = a_projection + h_projection z = z.replace_placeholder(z) z = reshape(z, label_shape) l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w_i, w_h, trainer)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func( C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [dist_learner]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def create_network(num_convolution_layers): """ Create network """ # Input variables denoting the features and label data input_var = cntk.input_variable( (_NUM_CHANNELS, _IMAGE_HEIGHT, _IMAGE_WIDTH)) label_var = cntk.input_variable((_NUM_CLASSES)) # create model, and configure learning parameters # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) print('Creating NN model') with layers.default_options(activation=relu, pad=True): model = layers.Sequential([ layers.For( range(num_convolution_layers), lambda: [ layers.Convolution2D((3, 3), 64), layers.Convolution2D((3, 3), 64), layers.MaxPooling((3, 3), (2, 2)) ]), layers.For( range(2), lambda i: [layers.Dense([256, 128][i]), layers.Dropout(0.5)]), layers.Dense(_NUM_CLASSES, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) return { 'name': 'convnet', 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': model }
def ffnet(data, labels): input_dim = 800 num_output_classes = 3 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 features, labels = generate_stock_data(minibatch_size); for i in range(1024): # features, labels = generate_random_data( # minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [ dist_learner ]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def cargarRedDesdeArchivo(archivo): input_dim = 800; num_output_classes = 3; feature = input((input_dim), np.float32); label = input((num_output_classes), np.float32) netout = crearRed(input_dim, 3, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(1) trainer = Trainer(netout, (ce, pe), learner, progress_printer) trainer.restore_from_checkpoint(archivo); return netout;
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) # z = Sequential([ # Dense(hidden_layers_dim, activation=relu), # Dense(hidden_layers_dim, activation=relu), # Dense(num_output_classes)])(scaled_input) with default_options(activation=relu, init=C.glorot_uniform()): z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)), Dense(num_output_classes, activation=None)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) # setup the data path = abs_path + "\Train-28x28_cntk_text.txt" reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. progress_writers = [ProgressPrinter( tag='Training', num_epochs=num_sweeps_to_train_with)] # Instantiate the trainer object to drive the model training lr = learning_rate_schedule(1, UnitType.sample) trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers) training_session( trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = abs_path + "\Test-28x28_cntk_text.txt" reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes) ])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(data_dir, 'Train-28x28_cntk_text.txt') reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. # training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( # freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = 0.001 trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result * 100 / num_minibatches_to_test
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = learning_parameter_schedule_per_sample(1) trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers) training_session( trainer=trainer, mb_source = reader_train, mb_size = minibatch_size, model_inputs_to_streams = input_map, max_samples = num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(tensorboard_logdir=None): input_dim = 19 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 1024 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model # scaled_input = element_times(constant(0.00390625), feature) z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = r"." path = os.path.normpath(os.path.join(data_dir, "train.ctf")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 512 num_samples_per_sweep = 1825000 num_sweeps_to_train_with = 100 # Instantiate progress writers. progress_writers = [ProgressPrinter( tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # Instantiate the trainer object to drive the model training lr = learning_parameter_schedule_per_sample(0.001) learner = create_learner(model=z) trainer = Trainer(z, (ce, pe), learner, progress_writers) num_minibatches_to_train = int(num_samples_per_sweep / minibatch_size * num_sweeps_to_train_with) model_dir = "model" for i in range(num_minibatches_to_train): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) freq = int(num_samples_per_sweep / minibatch_size) if i > 0 and i % freq == 0: timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") current_trainer_cp = os.path.join(model_dir, timestamp + "_epoch_" + str(freq) + ".trainer") trainer.save_checkpoint(current_trainer_cp) train_error = get_error_rate(os.path.join(data_dir, "train_subset.ctf"), input_map, input_dim, num_output_classes, trainer) valid_error = get_error_rate(os.path.join(data_dir, "validation.ctf"), input_map, input_dim, num_output_classes, trainer) if train_error > 0: tensorboard_writer.write_value("train_error", train_error, i) if valid_error > 0: tensorboard_writer.write_value("valid_error", valid_error, i) feat_path = os.path.normpath(os.path.join(data_dir, "test.ctf")) return get_error_rate(feat_path, input_map, input_dim, num_output_classes, trainer)
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 cfg: The configuration dictionary add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["MODEL"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init=normal(scale=0.01), init_bias=0.0)(conv_out) rpn_cls_score = Convolution( (1, 1), 18, activation=None, name="rpn_cls_score", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution( (1, 1), 36, activation=None, name="rpn_bbox_pred", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape( rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if (add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function( AnchorTargetLayer( rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum( rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block( normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum( rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block( normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None, conv_bias_init=0.0): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["CNTK"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({ input_sequence: features, label_sequence: labels }, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e * minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
Dense(500, activation=relu), Dense(300, activation=relu), Dense(K, activation=None), ]) # define the inputs and labels inputs = C.input_variable(D, np.float32, name='inputs') labels = C.input_variable(K, np.float32, name='labels') # get the output logits = model(inputs) # define loss / metrics # like Tensorflow the softmax is done # internally (if needed), so all we need are the logits ce = cross_entropy_with_softmax(logits, labels) pe = classification_error(logits, labels) # training config batch_size = 32 epochs = 15 n_batches = len(Xtrain) // batch_size # do the training # specify the training algorithm trainer = Trainer(logits, (ce, pe), adam(logits.parameters, lr=1e-2, momentum=0.9)) # helper function
) # define the inputs and labels inputs = C.input_variable(D, np.float32, name='inputs') labels = C.input_variable(K, np.float32, name='labels') # get the output logits = model(inputs) # define loss / metrics # like Tensorflow the softmax is done # internally (if needed), so all we need are the logits ce = cross_entropy_with_softmax(logits, labels) pe = classification_error(logits, labels) # training config batch_size = 32 epochs = 15 n_batches = len(Xtrain) // batch_size # do the training # specify the training algorithm trainer = Trainer(logits, (ce, pe), adam(logits.parameters, lr=1e-2, momentum=0.9))
def criterion(query, labels): z = model(query) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return (ce, errs)
def __train_cntk(self, path_to_folder: str, model_definition, epochs: int, output_model_path: str, classes, minibatch_size: int): import cntk from cntk.learners import learning_parameter_schedule from cntk.ops import input_variable from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer import cntk.io.transforms as xforms from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization from cntk.ops.functions import CloneMethod from cntk.logging import ProgressPrinter from cntk.losses import cross_entropy_with_softmax from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd import pandas as pd path_to_folder = path_to_folder.rstrip('/') map_file_train = path_to_folder + "/train_map.txt" map_file_test = path_to_folder + "/test_map.txt" classes_set = set() num_train = 0 num_test = 0 num_channels = 3 class TrackDataset(UserDeserializer): def __init__(self, map_file, streams, chunksize=100): super(TrackDataset, self).__init__() self._batch_size = chunksize self.dataframes = pd.read_csv(map_file, sep='\t', dtype=str, header=None, names=["features", "labels"]) self._streams = [ cntk.io.StreamInformation(s['name'], i, 'dense', np.float32, s['shape']) for i, s in enumerate(streams) ] self._num_chunks = int( math.ceil(len(self.dataframes) / chunksize)) def _scale_image(self, image, width=224, height=168): try: return image.resize((width, height), Image.LINEAR) except: raise Exception('scale_image error') def stream_infos(self): return self._streams def num_chunks(self): return self._num_chunks def get_chunk(self, chunk_id): images = [] labels = [] maximum = (chunk_id + 1) * self._batch_size if (maximum > len(self.dataframes)): maximum = len(self.dataframes) for i in range(chunk_id * self._batch_size, maximum): img_name = self.dataframes.iloc[i, 0] image = Image.open(img_name) cl = self.dataframes.iloc[i, 1:].values[0] image = self._scale_image(image) image = np.moveaxis((np.array(image).astype('float32')), -1, 0) image -= np.mean(image, keepdims=True) image /= (np.std(image, keepdims=True) + 1e-6) images.append(image) yv = np.zeros(num_classes) yv[classes.index(cl)] = 1 labels.append(yv) result = {} features = np.array(images) lab = np.array(labels).astype('float32') result[self._streams[0].m_name] = features result[self._streams[1].m_name] = lab return result try: with open(map_file_train) as f: csv_reader = csv.reader(f, delimiter='\t') for row in csv_reader: cmd = row[1] classes_set.add(cmd) num_train = num_train + 1 except Exception as e: raise Exception( "No train_map.txt file found in path " + path_to_folder + ". Did you create a dataset using create_balanced_dataset()?") num_classes = len(classes) with open(map_file_test) as f: for num_test, l in enumerate(f): pass # transforms = [ # xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'), # xforms.mean(mean_file) # ] dataset_train = TrackDataset(map_file=map_file_train, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_train = MinibatchSource([dataset_train], randomize=True) # a = dataset_train.num_chunks() dataset_test = TrackDataset(map_file=map_file_test, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_test = MinibatchSource([dataset_test], randomize=True) # ImageDeserializer loads images in the BGR format, not RGB # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # mb = reader_train.next_minibatch(10) input_var = input_variable( (num_channels, self.__image_height, self.__image_width)) label_var = input_variable((num_classes)) model = model_definition(input_var) ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) epoch_size = num_train lr_per_minibatch = learning_parameter_schedule([0.01] * 10 + [0.003] * 10 + [0.001], epoch_size=epoch_size) momentums = momentum_schedule(0.9, minibatch_size=minibatch_size) l2_reg_weight = 0.001 learner = momentum_sgd(model.parameters, lr=lr_per_minibatch, momentum=momentums, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs) trainer = cntk.train.Trainer(model, (ce, pe), [learner], [progress_printer]) input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } print("Training started") batch_index = 0 plot_data = {'batchindex': [], 'loss': [], 'error': []} for epoch in range(epochs): sample_count = 0 while sample_count < epoch_size: data: MinibatchSource = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += data[label_var].num_samples batch_index += 1 plot_data['batchindex'].append(batch_index) plot_data['loss'].append( trainer.previous_minibatch_loss_average) plot_data['error'].append( trainer.previous_minibatch_evaluation_average) trainer.summarize_training_progress() metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 epoch_size = num_test while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) data = reader_test.next_minibatch(current_minibatch, input_map=input_map) metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") model.save(output_model_path, format=ModelFormat.ONNX)
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input_variable from cntk.layers import Dense from cntk.ops import relu from cntk.metrics import classification_error from cntk.losses import cross_entropy_with_softmax path = "dataset.txt" featuresShapeValue = 6 labelsShapeValue = 1 featuresShape = input_variable(featuresShapeValue) labelsShape = input_variable(labelsShapeValue) ctfdResult = CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=featuresShapeValue), labels=StreamDef(field='labels', shape=labelsShapeValue))) reader = MinibatchSource(ctfdResult) hiddenLayerDimension = 4 hiddenLayerOne = Dense(hiddenLayerDimension, activation=relu)(featuresShape) outputLayer = Dense(labelsShapeValue, activation=relu)(hiddenLayerOne) crossEntropy = cross_entropy_with_softmax(outputLayer, labelsShape) classificationError = classification_error(outputLayer, labelsShape)
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = input(input_dim, np.float32) label = input(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), adadelta(z.parameters), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, var_to_stream=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
input_sequence = sequence.input_variable(shape=vocab_size) label_sequence = sequence.input_variable(shape=vocab_size) model = Sequential([ For( range(2), lambda: Sequential( [Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size) ]) z = model(input_sequence) z_sm = cntk.softmax(z) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer)
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: (image_widht, image_height, image_scale) as CNTK variable or constant add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=0.1)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(np.prod(rpn_cls_score.shape) / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions)) rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax") rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape) # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # For loss functions: ignore label predictions for the 'ignore label', # i.e. set target and prediction to 0 --> needs to be softmaxed before rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions)) ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1)) rpn_cls_prob_ignore = ignore.outputs[0] fg_targets = ignore.outputs[1] bg_targets = 1 - fg_targets rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0) # RPN losses rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0) rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights)) rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses") return rpn_rois, rpn_losses