def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = cntk.blocks.Input(input_dim, is_sparse=False) slot_labels = cntk.blocks.Input( num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, slot_labels) pe = cntk.ops.classification_error(z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 # TODO: Change to round number. This is 664.39. 700? momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule( minibatch_size / -math.log(0.9)) # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) lr_schedule = [0.003] * 2 + [0.0015] * 12 + [0.0003] # trainer object lr_per_sample = cntk.learner.learning_rate_schedule( lr_schedule, cntk.learner.UnitType.sample, epoch_size) learner = cntk.learner.adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = cntk.Trainer(z, ce, pe, [learner]) # define mapping from reader streams to network inputs input_map = { query: reader.streams.query, slot_labels: reader.streams.slot_labels } # process minibatches and perform model training cntk.utils.log_number_of_parameters(z) print() progress_printer = cntk.ProgressPrinter( freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 # loop over epochs for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size # loop over minibatches on the epoch while t < epoch_end: # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch( min(minibatch_size, epoch_end - t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric
def train(reader, model, max_epochs, model_dir=None, tensorboard_logdir=None): # Input variables denoting the features and label data query = cntk.blocks.Input(input_dim, is_sparse=False) slot_labels = cntk.blocks.Input( num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, slot_labels) pe = cntk.ops.classification_error(z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 # TODO: Change to round number. This is 664.39. 700? momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule( minibatch_size / -math.log(0.9)) # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) lr_schedule = [0.003] * 2 + [0.0015] * 12 + [0.0003] lr_per_sample = cntk.learner.learning_rate_schedule( lr_schedule, cntk.learner.UnitType.sample, epoch_size) learner = cntk.learner.adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # Progress writers progress_writers = [ cntk.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) ] # more detailed logging #progress_writers = [cntk.ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboard_logdir is not None: progress_writers.append( cntk.TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # trainer object trainer = cntk.Trainer(z, (ce, pe), [learner], progress_writers) # define mapping from reader streams to network inputs input_map = { query: reader.streams.query, slot_labels: reader.streams.slot_labels } # process minibatches and perform model training cntk.utils.log_number_of_parameters(z) print() t = 0 aggregate_loss = 0 aggregate_error = 0 total_samples = 0 # loop over epochs for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size aggregate_loss = 0 aggregate_error = 0 total_samples = 0 # loop over minibatches on the epoch while t < epoch_end: # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch( min(minibatch_size, epoch_end - t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it samples = trainer.previous_minibatch_sample_count t += samples total_samples += samples aggregate_loss += trainer.previous_minibatch_loss_average * samples aggregate_error += trainer.previous_minibatch_evaluation_average * samples #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') if model_dir: z.save(os.path.join(model_dir, "atis" + "_{}.dnn".format(epoch))) trainer.summarize_training_progress() return aggregate_loss / total_samples, aggregate_error / total_samples