def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')], 0) features_si = mb_source[features] labels_si = mb_source[label] # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 if debug_output: training_progress_output_freq = training_progress_output_freq/3 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {features: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average()) loss_average = copy.copy(trainer.previous_minibatch_loss_average()) return evaluation_average, loss_average
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y') ], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, ce, pe, [sgd(classifier_output.parameters(), lr=0.0005)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { features: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average()) loss_average = copy.copy(trainer.previous_minibatch_loss_average()) return evaluation_average, loss_average
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) # TODO: make sparse once it works slot_labels = Input(num_labels, is_sparse=True) # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 lr_per_sample = [0.003]*2+[0.0015]*12+[0.0003] momentum = 0.9**(1/minibatch_size) # TODO: change to time constant # trainer object lr_schedule = learning_rates_per_sample(lr_per_sample, units=epoch_size) learner = fsadagrad(z.parameters(), lr_schedule, momentum, targetAdagradAvDenom=1, clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, ce, pe, [learner]) #_extend_Trainer(trainer) # TODO: should be just baked in # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training t = 0 mbs = 0 for epoch in range(max_epochs): loss_numer = 0 # TODO: find a nicer way of tracking, this is clumsy loss_denom = 0 metric_numer = 0 metric_denom = 0 epoch_end = (epoch+1) * epoch_size while t < epoch_end: # BUGBUG: RuntimeError: GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported #data, num_samples = next_minibatch(reader, min(minibatch_size, epoch_size-t), input_map) data, num_samples = next_minibatch(reader, minibatch_size, input_map) if data is None: break trainer.train_minibatch(data) loss_numer += trainer.previous_minibatch_loss_average() * trainer.previous_minibatch_sample_count() # too much code for something this simple loss_denom += trainer.previous_minibatch_sample_count() metric_numer += trainer.previous_minibatch_evaluation_average() * trainer.previous_minibatch_sample_count() metric_denom += trainer.previous_minibatch_sample_count() print_training_progress(trainer, mbs if mbs > 10 else 0, num_mbs_to_show_result) t += num_samples[slot_labels] #print (num_samples[slot_labels], t) mbs += 1 print("--- EPOCH {} DONE: loss = {:0.6f} * {}, metric = {:0.1f}% * {} ---".format(epoch+1, loss_numer/loss_denom, loss_denom, metric_numer/metric_denom*100.0, metric_denom)) return loss_numer/loss_denom, metric_numer/metric_denom