def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def criterion(input, labels): # criterion function must drop the <s> from the labels postprocessed_labels = sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = cross_entropy_with_softmax(z, postprocessed_labels) errs = classification_error (z, postprocessed_labels) return (ce, errs)
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700? lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) # trainer object lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size) learner = adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=True, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, (ce, pe), [learner]) # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training log_number_of_parameters(z) ; print() # more detailed logging progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', tensorboard_log_dir='atis_log', model=z) #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save_model(model_filename) print("Saved model to '%s'" % model_filename)
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features : reader.streams.features, label : reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train_sequence_classifier(): input_dim = 2000; cell_dim = 25; hidden_dim = 25; embedding_dim = 50; num_output_classes = 5; # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes = [Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim, True, 'x' ), StreamConfiguration( labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0; while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {features : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = os.path.join(*"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) if not os.path.exists(path): readme_file = os.path.normpath(os.path.join(os.path.dirname(path), "..", "README.md")) raise RuntimeError("File '%s' does not exist. Please follow the instructions at %s to download and prepare it."%(path, readme_file)) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim ), StreamConfiguration( labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {input : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(tl_model, (ce, pe), learner) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) return tl_model
def ffnet(debug_output=False): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net( input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_samples_per_sweep = 10000 num_sweeps_to_train_with = 2 num_minibatches_to_train = ( num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 60 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(0, int(num_minibatches_to_train)): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) print_training_progress(trainer, i, training_progress_output_freq) test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {input: test_features, label: test_labels}) return avg_error
def create_recurrent_network(): # Input variables denoting the features and label data features = input_variable(((2*context+1)*feature_dim)) labels = input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net( input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {input: test_features, label: test_labels}) return avg_error
def cifar_resnet(): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes) features_si = minibatch_source.stream_info(feats_stream_name) labels_si = minibatch_source.stream_info(labels_stream_name) # Input variables denoting the features and label data image_input = input_variable((num_channels, image_height, image_width), features_si.m_element_type) label_var = input_variable((num_classes), features_si.m_element_type) # Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.0078125) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb=minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {image_input : mb[features_si].m_data, label_var : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, var_to_stream=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def train_and_evaluate(reader_train, reader_test, network_name): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 128 max_epochs = 160 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, network_name + "_{}.dnn".format(epoch))) # Evaluation parameters epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size) momentum_time_constant = [0]*20+[-minibatch_size/np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def create_network(input_vocab_dim, label_vocab_dim): # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) return { 'raw_input' : raw_input, 'raw_labels' : raw_labels, 'ce' : ce, 'pe' : errs, 'ng' : ng, 'output': z }
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, tensorboard_logdir=None): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(abs_path, "..", "..", "..", "..", "..", "Examples", "Image", "DataSets", "MNIST", "Train-28x28_cntk_text.txt") path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input : reader_train.streams.features, label : reader_train.streams.labels } lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(z, ce, pe, sgd(z.parameters, lr=lr_per_minibatch)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq/4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(abs_path, "..", "..", "..", "..", "..", "Examples", "Image", "DataSets", "MNIST", "Test-28x28_cntk_text.txt") path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def criterion(query, labels): z = model(query) ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) return (ce, errs)
def sequence_to_sequence_translator(debug_output=False): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes) label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_from_ground_truth)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr = 0.007 momentum_time_constant = 1100 m_schedule = momentum_schedule(momentum_time_constant) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd(z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')], 10000) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 30 if debug_output: training_progress_output_freq = training_progress_output_freq/3 while True: mb = mb_source.next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')], 10000, False) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # choose this to be big enough for the longest sentence train_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_mb_source.next_minibatch(train_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input: reader_train.streams.features, label: reader_train.streams.labels } lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq / 4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error(z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 momentum_time_constant = momentum_as_time_constant_schedule( minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700? lr_schedule = [0.003] * 2 + [0.0015] * 12 + [ 0.0003 ] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) # trainer object lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size) learner = adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, ce, pe, [learner]) # define mapping from reader streams to network inputs input_map = { query: reader.streams.query, slot_labels: reader.streams.slot_labels } # process minibatches and perform model training log_number_of_parameters(z) print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch( min(minibatch_size, epoch_end - t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric
def train_and_evaluate(reader_train, reader_test, max_epochs): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) # apply model to input z = create_resnet_model(input_var_norm, 10) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 minibatch_size = 128 # Set learning parameters lr_per_minibatch = learning_rate_schedule([1] * 80 + [0.1] * 40 + [0.01], epoch_size, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule( -minibatch_size / np.log(0.9)) l2_reg_weight = 0.0001 # trainer object learner = momentum_sgd(z.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[ label_var].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # # Evaluation action # epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 #progress_printer = ProgressPrinter(freq=100, first=10, tag='Eval') while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") # return evaluation error. return metric_numer / metric_denom
def train_sequence_to_sequence_translator(): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model input_dynamic_axes = [Axis('inputAxis'), Axis.default_batch_axis()] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes) label_dynamic_axes = [Axis('labelAxis'), Axis.default_batch_axis()] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_dynamic_axes[0], 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select( is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i == 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], 10000) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.007) momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample( math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [ momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation) ]) # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 10 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { raw_input: mb[features_si].m_data, raw_labels: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def convnet_mnist(debug_output=False): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input_var) with default_options(activation=relu, pad=False): conv1 = Convolution((5, 5), 32, pad=True)(scaled_input) pool1 = MaxPooling((3, 3), (2, 2))(conv1) conv2 = Convolution((3, 3), 48)(pool1) pool2 = MaxPooling((3, 3), (2, 2))(conv2) conv3 = Convolution((3, 3), 64)(pool2) f4 = Dense(96)(conv3) drop4 = Dropout(0.5)(f4) z = Dense(num_output_classes, activation=None)(drop4) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 60000 # for now we manually specify epoch size minibatch_size = 128 # Set learning parameters lr_per_sample = [0.001] * 10 + [0.0005] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size) mm_time_constant = [0] * 5 + [1024] mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 40 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[ label_var].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model( z, os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch))) # Load test data reader_test = create_reader( os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += trainer.previous_minibatch_sample_count minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output: net_output.output}) # Instantiate the trainer object to drive the model training lr_per_sample = 0.007 minibatch_size = 72 momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, ce, errs, learner) # setup data rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tiny.ctf") # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = create_reader(train_path, randomize_data, input_vocab_dim, label_vocab_dim) train_bind = { raw_input: train_reader.streams.features, raw_labels: train_reader.streams.labels } # get the vocab for printing output sequences in plaintext rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping" vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = {i: ch for i, ch in enumerate(vocab)} # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 valid_reader = create_reader(valid_path, False, input_vocab_dim, label_vocab_dim) valid_bind = { find_arg_by_name('raw_input', ng): valid_reader.streams.features, find_arg_by_name('raw_labels', ng): valid_reader.streams.labels } for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind) e = ng.eval(mb_valid) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[raw_labels].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer / denom, 100.0 * (metric_numer / denom))) error1 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) save_model(z, "seq2seq.dnn") z = load_model("seq2seq.dnn") label_seq_axis = Axis('labelAxis') label_sequence = slice(find_arg_by_name('raw_labels', z), label_seq_axis, 1, 0) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) trainer = Trainer(z, ce, errs, [ momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, clipping_threshold_per_sample, gradient_clipping_with_truncation) ]) error2 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) assert error1 == error2 return error1
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y') ], FULL_DATA_SWEEP) features_si = mb_source[features] labels_si = mb_source[label] # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters, lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 while True: mb = mb_source.next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {features: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/Datasets/MNIST/Train-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters, lr=0.003125)]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq / 4 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {input: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/Datasets/MNIST/Test-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) test_mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ], randomize=False) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = test_mb_source.next_minibatch(test_minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {input: mb[features_si], label: mb[labels_si]} eval_error = trainer.test_minibatch(arguments) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) # Instantiate the trainer object to drive the model training lr = 0.007 minibatch_size = 72 momentum_time_constant = 1100 m_schedule = momentum_schedule(momentum_time_constant) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd( z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # setup data rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tiny.ctf") feature_stream_name = 'features' labels_stream_name = 'labels' # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = text_format_minibatch_source(train_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], randomize=randomize_data) features_si_tr = train_reader.stream_info(feature_stream_name) labels_si_tr = train_reader.stream_info(labels_stream_name) valid_reader = text_format_minibatch_source(valid_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], randomize=False) features_si_va = valid_reader.stream_info(feature_stream_name) labels_si_va = valid_reader.stream_info(labels_stream_name) # get the vocab for printing output sequences in plaintext rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping" vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = { i:ch for i,ch in enumerate(vocab) } # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) train_args = {'raw_input': mb_train[features_si_tr], 'raw_labels': mb_train[labels_si_tr]} trainer.train_minibatch(train_args) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size) valid_args = {'raw_input': mb_valid[features_si_va], 'raw_labels': mb_valid[labels_si_va]} e = ng.eval(valid_args) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[labels_si_tr].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer/denom, 100.0*(metric_numer/denom))) # now setup a test run rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_reader = text_format_minibatch_source(test_path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'), StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1') ], 10000, randomize=False) features_si_te = test_reader.stream_info(feature_stream_name) labels_si_te = test_reader.stream_info(labels_stream_name) test_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_reader.next_minibatch(test_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si_te], raw_labels: mb[labels_si_te]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def cifar_resnet(base_path, debug_output=False): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes, base_path) features_si = minibatch_source[feats_stream_name] labels_si = minibatch_source[labels_stream_name] # Input variables denoting the features and label data image_input = input_variable( (num_channels, image_height, image_width), features_si.m_element_type) label_var = input_variable((num_classes), features_si.m_element_type) # Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0078125)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 60 num_mbs = 1000 if debug_output: training_progress_output_freq = training_progress_output_freq/3 for i in range(0, num_mbs): mb = minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si], label_var: mb[labels_si] } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) test_minibatch_source = create_test_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes, base_path) features_si = test_minibatch_source[feats_stream_name] labels_si = test_minibatch_source[labels_stream_name] mb_size = 64 num_mbs = 300 total_error = 0.0 for i in range(0, num_mbs): mb = test_minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si], label_var: mb[labels_si] } error = trainer.test_minibatch(arguments) total_error += error return total_error / num_mbs
def train_and_evaluate(reader_train, reader_test, max_epochs): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input z = create_vgg9_model(input_var, 10) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 minibatch_size = 64 # For basic model lr_per_sample = [0.00015625]*10+[0.000046875]*10+[0.0000156] momentum_per_sample = 0.9 ** (1.0 / minibatch_size) # BUGBUG: why does this work? Should be as time const, no? l2_reg_weight = 0.03 # trainer object lr_schedule = learning_rate_schedule(lr_per_sample, units=epoch_size) learner = momentum_sgd(z.parameters, lr_schedule, momentum_per_sample, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # # Evaluation action # epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 #progress_printer = ProgressPrinter(freq=100, first=10, tag='Eval') while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("")
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_writers) training_session( trainer=trainer, mb_source = reader_train, mb_size = minibatch_size, var_to_stream = input_map, max_samples = num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = os.path.join( *"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt". split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) if not os.path.exists(path): readme_file = os.path.normpath( os.path.join(os.path.dirname(path), "..", "README.md")) raise RuntimeError( "File '%s' does not exist. Please follow the instructions at %s to download and prepare it." % (path, readme_file)) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def train_lm(training_file): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, ce, errs, learner) sample_freq = 1000 epochs = 50 minibatches_per_epoch = int((data_size / minibatch_size)) minibatches = epochs * minibatches_per_epoch # print out some useful training information log_number_of_parameters(z) print() progress_printer = ProgressPrinter(freq=100, tag='Training') e = 0 p = 0 for i in range(0, minibatches): if p + minibatch_size + 1 >= data_size: p = 0 e += 1 model_filename = "models/shakespeare_epoch%d.dnn" % e z.save_model(model_filename) print("Saved model to '%s'" % model_filename) # get the data features, labels = get_data(p, minibatch_size, data, char_to_ix, vocab_dim) # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [False] if p == 0: mask = [True] arguments = ({input_sequence: features, label_sequence: labels}, mask) trainer.train_minibatch(arguments) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if i % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) p += minibatch_size
def sequence_to_sequence_translator(debug_output=False, run_test=False): input_vocab_dim = 69 label_vocab_dim = 69 # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable( shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, ce, errs, learner) # setup data train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.train-dev-20-21.ctf") valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "tiny.ctf") # readers randomize_data = True if run_test: randomize_data = False # because we want to get an exact error train_reader = create_reader(train_path, randomize_data, input_vocab_dim, label_vocab_dim) train_bind = { raw_input : train_reader.streams.features, raw_labels : train_reader.streams.labels } # get the vocab for printing output sequences in plaintext vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "Data", "cmudict-0.7b.mapping") vocab = [w.strip() for w in open(vocab_path).readlines()] i2w = { i:ch for i,ch in enumerate(vocab) } # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 minibatch_size = 72 epoch_size = 908241 max_epochs = 10 training_progress_output_freq = 500 # make things more basic for running a quicker test if run_test: epoch_size = 5000 max_epochs = 1 training_progress_output_freq = 30 valid_reader = create_reader(valid_path, False, input_vocab_dim, label_vocab_dim) valid_bind = { find_arg_by_name('raw_input',ng) : valid_reader.streams.features, find_arg_by_name('raw_labels',ng) : valid_reader.streams.labels } for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % training_progress_output_freq == 0: mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind) e = ng.eval(mb_valid) print_sequences(e, i2w) print_training_progress(trainer, mbs, training_progress_output_freq) i += mb_train[raw_labels].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer/denom, 100.0*(metric_numer/denom))) error1 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) z.save_model("seq2seq.dnn") z.restore_model("seq2seq.dnn") label_seq_axis = Axis('labelAxis') label_sequence = sequence.slice(find_arg_by_name('raw_labels',z), 1, 0) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) trainer = Trainer(z, ce, errs, [momentum_sgd( z.parameters, lr_per_minibatch, momentum_time_constant, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) error2 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim) assert error1 == error2 return error1
def convnet_mnist(debug_output=False): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input_var) with default_options (activation=relu, pad=False): conv1 = Convolution((5,5), 32, pad=True)(scaled_input) pool1 = MaxPooling((3,3), (2,2))(conv1) conv2 = Convolution((3,3), 48)(pool1) pool2 = MaxPooling((3,3), (2,2))(conv2) conv3 = Convolution((3,3), 64)(pool2) f4 = Dense(96)(conv3) drop4 = Dropout(0.5)(f4) z = Dense(num_output_classes, activation=None)(drop4) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 60000 # for now we manually specify epoch size minibatch_size = 128 # Set learning parameters lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size) momentum_time_constant = [0]*5+[1024] # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, momentum_time_constant) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 40 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def train_sequence_to_sequence_translator(): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model input_dynamic_axes = [ Axis('inputAxis'), Axis.default_batch_axis() ] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes = input_dynamic_axes) label_dynamic_axes = [ Axis('labelAxis'), Axis.default_batch_axis() ] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes = label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_dynamic_axes[0], 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(encoder_outputH, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i == 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(decoder_outputH, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_vocab_dim, True, 'S0' ), StreamConfiguration( labels_stream_name, label_vocab_dim, True, 'S1') ], 10000) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.007) momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 10 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def sequence_to_sequence_translator(debug_output=False): input_vocab_dim = 69 label_vocab_dim = 69 hidden_dim = 512 num_layers = 2 # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis("inputAxis") label_seq_axis = Axis("labelAxis") input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes) label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes) # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = slice(raw_labels, label_seq_axis, 1, 0) label_sentence_start = sequence.first(raw_labels) is_first_label = sequence.is_first(label_sequence) label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output(), hidden_dim, hidden_dim, future_value, future_value ) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_from_ground_truth = label_sequence decoder_input = element_select( is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth) ) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if i > 0: recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output(), hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC ) decoder_output = decoder_outputH decoder_dim = hidden_dim # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr = 0.007 momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer( z, ce, errs, [ momentum_sgd( z.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation, ) ], ) rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = "features" labels_stream_name = "labels" mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"), StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"), ], 10000, ) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 30 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) test_mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"), StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"), ], 10000, False, ) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # choose this to be big enough for the longest sentence train_minibatch_size = 1024 # Get minibatches of sequences to test and perform testing i = 0 total_error = 0.0 while True: mb = test_mb_source.get_next_minibatch(train_minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]} mb_error = trainer.test_minibatch(arguments) total_error += mb_error if debug_output: print("Minibatch {}, Error {} ".format(i, mb_error)) i += 1 # Average of evaluation errors of all test minibatches return total_error / i
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save( os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch + 1))) return frcn_output
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input_variable((num_channels, sequence_length, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options (activation=relu): z = Sequential([ Convolution3D((3,3,3), 64, pad=True), MaxPooling((1,2,2), (1,2,2)), For(range(3), lambda i: [ Convolution3D((3,3,3), [96, 128, 128][i], pad=True), Convolution3D((3,3,3), [96, 128, 128][i], pad=True), MaxPooling((2,2,2), (2,2,2)) ]), For(range(2), lambda : [ Dense(1024), Dropout(0.5) ]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01]*10+[0.001]*10+[0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch(minibatch_size) trainer.train_minibatch({input_var : videos, label_var : labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch(minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source[features] labels_si = mb_source[label] # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 if debug_output: training_progress_output_freq = training_progress_output_freq/3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {features: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average()) loss_average = copy.copy(trainer.previous_minibatch_loss_average()) return evaluation_average, loss_average
def cifar_resnet_distributed(data_path, run_test, num_epochs, communicator=None, save_model_filename=None, load_model_filename=None, debug_output=False): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True, distributed_communicator = communicator) features_si = minibatch_source[feats_stream_name] labels_si = minibatch_source[labels_stream_name] # Instantiate the resnet classification model, or load from file if load_model_filename: print("Loading model:", load_model_filename) classifier_output = persist.load_model(load_model_filename) image_input = classifier_output.arguments[0] else: image_input = input_variable( (num_channels, image_height, image_width), features_si.m_element_type) classifier_output = create_resnet_model(image_input, num_classes) # Input variables denoting the features and label data label_var = input_variable((num_classes), features_si.m_element_type) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training mb_size = 128 num_mb_per_epoch = 100 num_mbs = num_mb_per_epoch * num_epochs lr_per_sample = [1/mb_size]*80+[0.1/mb_size]*40+[0.01/mb_size] lr_schedule = learning_rate_schedule(lr_per_sample, units = mb_size * num_mb_per_epoch) momentum_time_constant = -mb_size/np.log(0.9) # create data parallel distributed trainer if needed dist_trainer = distributed.data_parallel_distributed_trainer(communicator, False) if communicator else None # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [momentum_sgd(classifier_output.parameters, lr_schedule, momentum_time_constant, l2_regularization_weight=0.0001)], distributed_trainer = dist_trainer) # Get minibatches of images to train with and perform model training training_progress_output_freq = 100 if communicator else 20 if debug_output: training_progress_output_freq = training_progress_output_freq/4 for i in range(0, num_mbs): # NOTE: depends on network, the mb_size can be changed dynamically here mb = minibatch_source.next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si], label_var: mb[labels_si] } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) if save_model_filename: print("Saving model:", save_model_filename) persist.save_model(classifier_output, save_model_filename) if run_test: test_minibatch_source = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False) features_si = test_minibatch_source[feats_stream_name] labels_si = test_minibatch_source[labels_stream_name] mb_size = 128 num_mbs = 100 total_error = 0.0 for i in range(0, num_mbs): mb = test_minibatch_source.next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si], label_var: mb[labels_si] } error = trainer.test_minibatch(arguments) total_error += error return total_error / num_mbs else: return 0
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/Datasets/MNIST/Train-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq / 4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/Datasets/MNIST/Test-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20+[600]*20+[1200] mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def cifar_resnet(base_path, debug_output=False): image_height = 32 image_width = 32 num_channels = 3 num_classes = 10 feats_stream_name = 'features' labels_stream_name = 'labels' minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes, base_path) features_si = minibatch_source.stream_info(feats_stream_name) labels_si = minibatch_source.stream_info(labels_stream_name) # Input variables denoting the features and label data image_input = input_variable((num_channels, image_height, image_width), features_si.m_element_type) label_var = input_variable((num_classes), features_si.m_element_type) # Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0078125)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb = minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si].m_data, label_var: mb[labels_si].m_data } trainer.train_minibatch(arguments) if debug_output: print_training_progress(trainer, i, training_progress_output_freq) test_minibatch_source = create_test_mb_source(feats_stream_name, labels_stream_name, image_height, image_width, num_channels, num_classes, base_path) features_si = test_minibatch_source.stream_info(feats_stream_name) labels_si = test_minibatch_source.stream_info(labels_stream_name) mb_size = 64 num_mbs = 300 total_error = 0.0 for i in range(0, num_mbs): mb = test_minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { image_input: mb[features_si].m_data, label_var: mb[labels_si].m_data } error = trainer.test_minibatch(arguments) total_error += error return total_error / num_mbs
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate progress writers. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout) progress_printer = ProgressPrinter(freq=10, tag='Training') # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) learner = sgd(netout.parameters, lr=lr_per_minibatch) trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx) trainer.summarize_training_progress() # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches trainer.summarize_test_progress() return test_result / num_minibatches_to_test
def train_and_evaluate(reader_train, reader_test, network_name, max_epochs, distributed_trainer): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner, distributed_trainer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # save model only in worker0, otherwise there will be file write conflicts for multi GPU on the same machine if distributed_trainer.communicator().current_worker().global_rank == 0: persist.save_model(z, os.path.join(model_path, network_name + "_{}.dnn".format(epoch))) # Evaluation parameters epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input_variable( (num_channels, sequence_length, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options(activation=relu): z = Sequential([ Convolution((3, 3, 3), 64, pad=True), MaxPooling((1, 2, 2), (1, 2, 2)), LayerStack( 3, lambda i: [ Convolution((3, 3, 3), [96, 128, 128][i], pad=True), Convolution((3, 3, 3), [96, 128, 128][i], pad=True), MaxPooling((2, 2, 2), (2, 2, 2)) ]), LayerStack(2, lambda: [Dense(1024), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = Trainer(z, ce, pe, learner) log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch( minibatch_size) trainer.train_minibatch({input_var: videos, label_var: labels}) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch( minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({ input_var: videos, label_var: labels }) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def convnet_cifar10_dataaug(reader_train, reader_test): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) momentum_time_constant = [0] * 20 + [600] * 20 + [1200] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # perform model training max_epochs = 80 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def train_and_evaluate(reader_train, reader_test, max_epochs): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) # apply model to input z = create_vgg9_model(input_var_norm, 10) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 minibatch_size = 64 # Set learning parameters lr_per_minibatch = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001], epoch_size, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9)) l2_reg_weight = 0.0001 # trainer object learner = momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentum_time_constant, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # # Evaluation action # epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 #progress_printer = ProgressPrinter(freq=100, first=10, tag='Eval') while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") # return evaluation error. return metric_numer/metric_denom
def train_and_evaluate(create_train_reader, test_reader, network_name, max_epochs, create_dist_learner, scale_up=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters epoch_size = 50000 # for now we manually specify epoch size # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minimatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy minibatch_size = 128 * (distributed.Communicator.num_workers() if scale_up else 1) momentum_time_constant = -minibatch_size / np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # trainer object learner = create_dist_learner( momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)) trainer = Trainer(z, ce, pe, learner) total_number_of_samples = max_epochs * epoch_size train_reader = create_train_reader(total_number_of_samples) # define mapping from reader streams to network inputs input_map = { input_var: train_reader.streams.features, label_var: train_reader.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # perform model training current_epoch = 0 updated = True while updated: data = train_reader.next_minibatch( minibatch_size, input_map=input_map) # fetch minibatch. updated = trainer.train_minibatch(data) # update model with it progress_printer.update_with_trainer(trainer, with_metric=True) # log progress epoch_index = int(trainer.total_number_of_samples_seen / epoch_size) if current_epoch != epoch_index: # new epoch reached progress_printer.epoch_summary(with_metric=True) current_epoch = epoch_index trainer.save_checkpoint( os.path.join(model_path, network_name + "_{}.dnn".format(current_epoch))) # Evaluation parameters epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while True: data = test_reader.next_minibatch(minibatch_size, input_map=input_map) if not data: break local_mb_samples = data[label_var].num_samples metric_numer += trainer.test_minibatch(data) * local_mb_samples metric_denom += local_mb_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom