def print_training_progress(trainer, mb, frequency): if mb % frequency == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print("Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}".format( mb, training_loss, eval_crit))
def print_training_progress(trainer, mb, frequency): if mb % frequency == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print("Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}". format(mb, training_loss, eval_crit))
def print_training_progress(trainer, mb, frequency): training_loss = "NA" eval_error = "NA" if mb%frequency == 0: training_loss = get_train_loss(trainer) eval_error = get_train_eval_criterion(trainer) return mb, training_loss, eval_error
def print_training_progress(trainer, mb, frequency): training_loss = "NA" eval_error = "NA" if mb % frequency == 0: training_loss = get_train_loss(trainer) eval_error = get_train_eval_criterion(trainer) return mb, training_loss, eval_error
def _test_cifar_resnet(): dev = 0 cntk_dev = cntk_device(dev) epoch_size = sys.maxsize mbs = create_mb_source(epoch_size) stream_infos = mbs.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si image_shape = features_si.m_sample_layout.dimensions() image_shape = (image_shape[2], image_shape[0], image_shape[1]) num_classes = labels_si.m_sample_layout.dimensions()[0] image_input = variable(image_shape, features_si.m_element_type, needs_gradient=False, name="Images") classifer_output = resnet_classifer(image_input, num_classes, dev, "classifierOutput") label_var = variable((num_classes, ), features_si.m_element_type, needs_gradient=False, name="Labels") ce = cross_entropy_with_softmax(classifer_output.output(), label_var) pe = classification_error(classifer_output.output(), label_var) image_classifier = combine([ce, pe, classifer_output], "ImageClassifier") learning_rate_per_sample = cntk_py.learning_rates_per_sample(0.0078125) trainer = cntk_py.Trainer(image_classifier, ce.output(), [ cntk_py.sgdlearner(image_classifier.parameters(), learning_rate_per_sample) ]) mb_size = 32 num_mbs = 100 minibatch_size_limits = dict() minibatch_size_limits[features_si] = (0, mb_size) minibatch_size_limits[labels_si] = (0, mb_size) for i in range(0, num_mbs): mb = mbs.get_next_minibatch(minibatch_size_limits, cntk_dev) arguments = dict() arguments[image_input] = mb[features_si].m_data arguments[label_var] = mb[labels_si].m_data trainer.train_minibatch(arguments, cntk_dev) freq = 20 if i % freq == 0: print(str(i + freq) + ": " + str(get_train_loss(trainer)))
def print_training_progress(trainer, mb, frequency, verbose=1): training_loss, eval_error = "NA", "NA" if mb % frequency == 0: training_loss = get_train_loss(trainer) eval_error = get_train_eval_criterion(trainer) if verbose: print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}".format(mb, training_loss, eval_error)) return mb, training_loss, eval_error
def test_simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 epoch_size = sys.maxsize minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 3 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size lr = cntk_py.learning_rates_per_sample(0.003125) input = variable((input_dim,), np.float32, needs_gradient=False, name="features") scaled_input = element_times(constant((), 0.00390625), input) label = variable((num_output_classes,), np.float32, needs_gradient=False, name="labels") dev = cntk_py.DeviceDescriptor.cpudevice() netout = fully_connected_classifier_net(scaled_input.output(), num_output_classes, hidden_layers_dim, num_hidden_layers, dev, sigmoid) ce = cross_entropy_with_softmax(netout.output(), label) pe = classification_error(netout.output(), label) ffnet = combine([ce, pe, netout], "classifier_model") cm = create_mb_source(input_dim, num_output_classes, epoch_size) stream_infos = cm.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si minibatch_size_limits = dict() minibatch_size_limits[features_si] = (0,minibatch_size) minibatch_size_limits[labels_si] = (0,minibatch_size) trainer = cntk_py.Trainer(ffnet, ce.output(), [cntk_py.sgdlearner(ffnet.parameters(), lr)]) for i in range(0,int(num_minibatches_to_train)): mb=cm.get_next_minibatch(minibatch_size_limits, dev) arguments = dict() arguments[input] = mb[features_si].m_data arguments[label] = mb[labels_si].m_data trainer.train_minibatch(arguments, dev) freq = 20 if i % freq == 0: training_loss = get_train_loss(trainer) print(str(i+freq) + ": " + str(training_loss)) #TODO: move the testing code into a separate test module ? assert np.allclose(training_loss, 0.6142425537109375, atol=TOLERANCE_ABSOLUTE)
acc_all = [] TtrainS = datetime.datetime.now() for epoch in range(1, EPOCH + 1): # Specify the input variables mapping in the model to actual minibatch data for training for i in range(int(len(X_train) / minibatch_size)): #i=0 trainer.train_minibatch({ Xs: X_train[i * minibatch_size:(i + 1) * minibatch_size], ys: y_train[i * minibatch_size:(i + 1) * minibatch_size] }) #trainer.train_minibatch({Xs:X_train, ys: y_train}) if (epoch % SHOW_FREQ == 0): cur_loss = get_train_loss(trainer) acc = get_train_eval_criterion(trainer) print("{}/{}, loss = {}, acc = {}".format(epoch, EPOCH, cur_loss, 1 - acc)) #print("{}/{}, loss = {}".format(epoch, EPOCH, trainer.test_minibatch({Xs : X_train[:20], ys : y_train[:20]}) )) if (epoch % TEST_FREQ == 0): print(1 - trainer.test_minibatch({Xs: X_test1, ys: y_test1})) acc2 = 1 - trainer.test_minibatch({Xs: X_test2, ys: y_test2}) print(acc2) acc_all.append(acc2) if acc2 > max_acc2: max_acc2 = acc2 TtrainE = datetime.datetime.now() plt.figure(figsize=(15, 9))
def train(train_reader, valid_reader, vocab, i2w, model, max_epochs): # do some hooks that we won't need in the future label_sequence = model.find_by_name('label_sequence') decoder_history_hook = model.find_by_name('decoder_history_hook') # Criterion nodes ce = cross_entropy_with_softmax(model, label_sequence) errs = classification_error(model, label_sequence) def clone_and_hook(): # network output for decoder history net_output = hardmax(model) # make a clone of the graph where the ground truth is replaced by the network output return model.clone(CloneMethod.share, {decoder_history_hook.output: net_output.output}) # get a new model that uses the past network output as input to the decoder new_model = clone_and_hook() # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) minibatch_size = 72 momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True learner = momentum_sgd( model.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(model, ce, errs, learner) # Get minibatches of sequences to train with and perform model training i = 0 mbs = 0 # Set epoch size to a larger number of lower training error epoch_size = 5000 if isFast else 908241 training_progress_output_freq = 100 # bind inputs to data from readers train_bind = { find_arg_by_name('raw_input', model): train_reader.streams.features, find_arg_by_name('raw_labels', model): train_reader.streams.labels } valid_bind = { find_arg_by_name('raw_input', new_model): valid_reader.streams.features, find_arg_by_name('raw_labels', new_model): valid_reader.streams.labels } for epoch in range(max_epochs): loss_numer = 0 metric_numer = 0 denom = 0 while i < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats samples = trainer.previous_minibatch_sample_count loss_numer += trainer.previous_minibatch_loss_average * samples metric_numer += trainer.previous_minibatch_evaluation_average * samples denom += samples # every N MBs evaluate on a test sequence to visually show how we're doing; also print training stats if mbs % training_progress_output_freq == 0: print( "Minibatch: {0}, Train Loss: {1:2.3f}, Train Evaluation Criterion: {2:2.3f}" .format(mbs, get_train_loss(trainer), get_train_eval_criterion(trainer))) mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind) e = new_model.eval(mb_valid) print_sequences(e, i2w) i += mb_train[find_arg_by_name('raw_labels', model)].num_samples mbs += 1 print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer / denom, 100.0 * (metric_numer / denom))) return 100.0 * (metric_numer / denom)
find_arg_by_name('raw_labels', model): train_reader.streams.labels } training_progress_output_freq = 100 max_num_minibatch = 100 if isFast else 1000 for i in range(max_num_minibatch): # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind) trainer.train_minibatch(mb_train) # collect epoch-wide stats if i % training_progress_output_freq == 0: print( "Minibatch: {0}, Train Loss: {1:.3f}, Train Evaluation Criterion: {2:2.3f}" .format(i, get_train_loss(trainer), get_train_eval_criterion(trainer))) decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) model = create_model() # get some references to the new model label_sequence = model.find_by_name('label_sequence') decoder_history_hook = model.find_by_name('decoder_history_hook') # and now replace the output of decoder_history_hook with the hardmax output of the network def clone_and_hook(): # network output for decoder history
def save_metrics(trainer, filename): training_loss = get_train_loss(trainer) eval_error = get_train_eval_criterion(trainer) f = open(filename, 'w') f.write("Loss: {0:.4f}, Error: {1:.2f}%".format(training_loss, eval_error * 100))