def test_focal_loss(): ce = C.cross_entropy_with_softmax([[1., 2., 3., 4.]], [[0.35, 0.15, 0.05, 0.45]]).eval() fl = Cx.focal_loss_with_softmax([[1., 2., 3., 4.]], [[0.35, 0.15, 0.05, 0.45]], alpha=1, gamma=0).eval() np.testing.assert_almost_equal(ce, fl, decimal=6) ce = C.cross_entropy_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]]).eval() fl = Cx.focal_loss_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]], gamma=2).eval() np.testing.assert_array_less(fl, ce) np.testing.assert_almost_equal(fl, np.array([[0.31306446]], dtype=np.float32), decimal=6) ce = C.cross_entropy_with_softmax([[0, 0, 0.2, 0.8]], [[0, 0, 1, 0]]).eval() fl = Cx.focal_loss_with_softmax([[0, 0, 0.2, 0.8]], [[0, 0, 1, 0]]).eval() np.testing.assert_array_less(fl, ce) ce = C.cross_entropy_with_softmax([[0, 0, -0.2, 50]], [[0, 0, 1, 0]]).eval() fl = Cx.focal_loss_with_softmax([[0, 0, -0.2, 50]], [[0, 0, 1, 0]]).eval() np.testing.assert_equal(ce, fl)
def test_sequence_unpack_backprop(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features') label_input = C.input_variable(num_labels, is_sparse=True, name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = C.sequence.last(C.layers.Recurrence(C.plus)(model)) ce = C.cross_entropy_with_softmax(z, label_input) seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]] seq2_data = [[0, 0, 1], [0, 1, 1]] label_data = _to_csr([[0, 1], [1, 0]]) param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) z = C.sequence.reduce_sum(model) ce = C.cross_entropy_with_softmax(z, label_input) param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray()) for param in param_grads_1: if not param_grads_1[param].is_sparse: reference_grad_value = param_grads_1[param].asarray() grad_value = param_grads_2[param].asarray() assert np.allclose(reference_grad_value, grad_value)
def create_criterion(network): '''Create the criterion for model''' model, label1, label2 = network['model'], network['row_label'], network['col_label'] label1_ce = C.cross_entropy_with_softmax(model.outputs[0], label1) label2_ce = C.cross_entropy_with_softmax(model.outputs[1], label2) label1_pe = C.classification_error(model.outputs[0], label1) label2_pe = C.classification_error(model.outputs[1], label2) label_ce = label1_ce + label2_ce label_pe = label1_pe + label2_pe return (label_ce, label_pe)
def init_model(m): progress_writers = [ cntk.logging.ProgressPrinter( freq=int(BATCHSIZE / 2), rank=cntk.train.distributed.Communicator.rank(), num_epochs=EPOCHS) ] # Loss (dense labels); check if support for sparse labels loss = cntk.cross_entropy_with_softmax(m, labels) # Momentum SGD # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient # if unit_gain=True then ...(1-momentum)*gradient local_learner = cntk.momentum_sgd( m.parameters, lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch), momentum=cntk.momentum_schedule(MOMENTUM), unit_gain=False) distributed_learner = cntk.train.distributed.data_parallel_distributed_learner( local_learner) trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [distributed_learner], progress_writers) return trainer, distributed_learner
def create_criterion_function(model, y_pre, labels, self_penalty): loss = C.cross_entropy_with_softmax(y_pre, labels) if self_penalty: p_coefficient = 1 loss += model.p * p_coefficient errs = C.classification_error(y_pre, labels) return loss, errs # (model, labels) -> (loss, error metric)
def criterion(input, labels): # criterion function must drop the <s> from the labels postprocessed_labels = sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = cross_entropy_with_softmax(z, postprocessed_labels) errs = classification_error (z, postprocessed_labels) return (ce, errs)
def run_cntk(): text, chars, char_indices, x_train, y_train = get_data(one_hot_encode_features=False) alphabet_size = len(chars) print('alphabet_size=', alphabet_size) model = build_model_cntk(alphabet_size=alphabet_size) model_filename = 'ch8-1_cntk.model' model.save(model_filename) model = None model = cntk.load_model(model_filename) x = cntk.sequence.input_variable(shape=(), dtype=np.float32) y = cntk.input_variable(shape=(), dtype=np.float32) model.replace_placeholders({model.placeholders[0]: x}) y_oneHot = cntk.one_hot(y, num_classes=alphabet_size) loss_function = cntk.cross_entropy_with_softmax(model.output, y_oneHot) learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9)) trainer = cntk.Trainer(model, (loss_function, loss_function), [learner],) for epoch in range(1, 60): print('epoch', epoch) cntk_train(x, y, x_train, y_train, max_epochs=32, batch_size=128, trainer=trainer) model_filename = 'final_ch8-1_cntk.model' model.save(model_filename) generate_text_cntk(char_indices, chars, model, text)
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = C.layers.Convolution((3, 3), 128, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = C.layers.Convolution((3, 3), 128, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = C.layers.Convolution((1, 1), num_classes, pad=True)(z) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes, )) SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def cross_entropy_with_softmax(target_vector, output_vector, name=''): ''' This operation computes the cross entropy over the softmax of the `output_vector`. It expects the `output_vector` as unscaled, and it computes softmax over the `output_vector` internally. Any `output_vector` input over which softmax is already computed before passing to this operator will be incorrect. :math:`cross\_entropy\_with\_softmax(t, o) = {-{\sum_{i \in \{1,len(t)\}} t_i \log(softmax(o_i)) }}` Example: >>> C.eval(C.cross_entropy_with_softmax([0., 0., 0., 1.], [1., 1., 1., 50.])) #[0.] >>> C.eval(C.cross_entropy_with_softmax([0.35, 0.15, 0.05, 0.45], [1., 2., 3., 4.])) #[1.84] Args: target_vector: usually it is one-hot vector where the hot bit corresponds to the label index. But it can be any probability distribution over the labels. output_vector: the unscaled computed output values from the network name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import cross_entropy_with_softmax target_vector = sanitize_input(target_vector, get_data_type(output_vector)) output_vector = sanitize_input(output_vector, get_data_type(target_vector)) return cross_entropy_with_softmax(target_vector, output_vector, name).output()
def trainDNN(trainX, trainY): numOutputClasses = 2 newCol = np.where(trainY == 0, 1, 0) newCol = pd.DataFrame(newCol) trainY = trainY.reset_index(drop=True) trainY = pd.concat([trainY, newCol], axis=1, ignore_index=True) inputDim = trainX.shape[1] trainX = np.ascontiguousarray(trainX.as_matrix().astype(np.float32)) trainY = np.ascontiguousarray(trainY.as_matrix().astype(np.float32)) input = C.input_variable(inputDim) label = C.input_variable(numOutputClasses) classifier = create_model(input) loss = C.cross_entropy_with_softmax(classifier, label) evalError = C.classification_error(classifier, label) learning_rate = 0.5 lrSchedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(classifier.parameters, lrSchedule) trainer = C.Trainer(classifier, (loss, evalError), [learner]) minibatchSize = 25 numSamples = trainX.shape[0] - (trainX.shape[0] % 25) numMinibatchesToTrain = numSamples / minibatchSize #train the model for i in range(0, int(numMinibatchesToTrain)): trainX, trainY, features, labels = getMinibatch( trainX, trainY, minibatchSize) trainer.train_minibatch({input: features, label: labels}) return [classifier, trainer, input, label]
def test_usermbsource_training(tmpdir): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType feature = sequence.input_variable(shape=(input_dim, )) label = C.input_variable(shape=(num_output_classes, )) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = {feature: mbs.fsi, label: mbs.lsi} session = training_session(trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20) session.train() assert trainer.total_number_of_samples_seen == 20
def create_network(para, verbose=False): with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu): # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0], strides=(1, 1), pad=True, name='C1')(network_input / 255.0) h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h) h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1], strides=(1, 1), pad=True, name='C2')(h) h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h) h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2], strides=(1, 1), pad=True, name='C2')(h) h = cntk.layers.Dense(para[3])(h) h = cntk.layers.Dropout(0.25)(h) z = cntk.layers.Dense(10, activation=None, name='R')(h) loss = cntk.cross_entropy_with_softmax(z, network_label) label_error = cntk.classification_error(z, network_label) lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch) learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9)) trainer = cntk.Trainer(z, (loss, label_error), [learner]) if verbose: log = cntk.logging.ProgressPrinter(100) for _ in xrange(20000): data = train_reader.next_minibatch(100, input_map=mapping(train_reader)) trainer.train_minibatch(data) if verbose: log.update_with_trainer(trainer) return trainer
def train_classifier(autoencoder_definition: Autoencoder): # Get the encoded layer, freeze its weights, add the classification and fine tune layers and train again encoded_model = autoencoder_definition.encoded_model feature_node = find_by_name(encoded_model, 'features') cloned_layers = C.combine([encoded_model]).clone(CloneMethod.freeze, {feature_node: features}) classifier = autoencoder_definition.classifier full_model = classifier(cloned_layers) # Needs GraphViz. Ensure to add the path of Graphviz (anaconda folder/envs/'environment name'/library/bin/graphviz) into the system environment variables # plot_path = "full_model.png" # plot(full_model, plot_path) reader_train = create_reader(train_file, True, input_dim, num_output_classes) # Train Classifier # Instantiate the loss and error function. loss_function = C.cross_entropy_with_softmax(full_model, labels) error_function = C.classification_error(full_model, labels) input_map={ labels : reader_train.streams.labels, features : reader_train.streams.features } trainer = train(reader=reader_train, model=full_model, loss_function=loss_function, error_function=error_function, input_map=input_map, num_sweeps_to_train_with = 100, num_samples_per_sweep = 2000, minibatch_size = 80, learning_rate = 0.02) full_model.save('full_model.model') return trainer
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name': network_name, 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': z }
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') features = C.sparse_input(vocab, dynamic_axis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.times(w, L) + b z.name='z' z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def criterion(input:InputSequence[C.layers.Tensor[input_vocab_dim]] ,labels:LabelSequence[C.layers.Tensor[label_vocab_dim]]): postprocessed_labels = C.sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = C.cross_entropy_with_softmax(z, postprocessed_labels) errs = C.classification_error(z, postprocessed_labels) return (ce, errs)
def createDecoderNetwork(self, networkHiddenSrc, srcLength, trgLength): timeZeroHidden = C.slice(networkHiddenSrc, 0, 0, 1) srcSentEmb = C.slice(timeZeroHidden, -1, Config.SrcHiddenSize, Config.SrcHiddenSize * 2) networkHiddenTrg = {} inputTrg = C.reshape(self.inputMatrixTrg, shape=(Config.TrgMaxLength, Config.BatchSize, Config.TrgVocabSize)) attProbAll = [] tce = 0 for i in range(0, trgLength, 1): preTrgEmb = self.initTrgEmb if i == 0 else self.EmbTrg(inputTrg[i - 1]) if (i == 0): networkHiddenTrg[i] = self.createDecoderInitNetwork(srcSentEmb) else: (networkHiddenTrg[i], attProb) = self.createDecoderRNNNetwork( networkHiddenSrc, preTrgEmb, networkHiddenTrg[i - 1], srcLength) attProbAll = attProb if i == 1 else C.splice( attProbAll, attProb, axis=0) preSoftmax = self.createReadOutNetwork(networkHiddenTrg[i], preTrgEmb) ce = C.cross_entropy_with_softmax(preSoftmax, inputTrg[i], 2) ce = C.reshape(ce, shape=(1, Config.BatchSize)) tce += C.times_transpose(ce, self.maskMatrixTrg[i]) return tce
def train_lm(testing=False): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) ; print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate) momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample) gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) last_avg_ce = 0 for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence : features, label_sequence : labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 last_avg_ce = av_ce num_trained_samples += token_count num_trained_samples_since_last_report += token_count if not testing: # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save(model_filename) print("Saved model to '%s'" % model_filename) return last_avg_ce
def create_network(): # Create the input and target variables input_var = cntk.input_variable( (sequence_length, frame_height, frame_width), name='input_var') target_var = cntk.input_variable((num_classes, ), is_sparse=True, name='target_var') input_head = cntk.slice(input_var, axis=0, begin_index=0, end_index=19) input_tail = cntk.slice(input_var, axis=0, begin_index=1, end_index=20) diff = input_tail - input_head model = Sequential([ resnet_model(cntk.placeholder()), Label('resnet'), Dense(num_classes, name='output') ])(diff) return { 'input': input_var, 'target': target_var, 'model': model, 'loss': cntk.cross_entropy_with_softmax(model, target_var), 'metric': cntk.classification_error(model, target_var) }
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') # temporarily using cntk1 SparseInput because cntk2's Input() will simply allow sparse as a parameter features = cntk1.SparseInput(vocab, dynamicAxis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.plus(C.times(w, L), b, name='z') z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def cross_entropy_with_softmax(output_vector, target_vector, name=''): ''' This operation computes the cross entropy over the softmax of the `output_vector`. It expects the `output_vector` as unscaled, and it computes softmax over the `output_vector` internally. Any `output_vector` input over which softmax is already computed before passing to this operator will be incorrect. :math:`cross\_entropy\_with\_softmax(o, t) = {-{\sum_{i \in \{1,len(t)\}} t_i \log(softmax(o_i)) }}` Example: >>> C.eval(C.cross_entropy_with_softmax([1., 1., 1., 50.], [0., 0., 0., 1.])) #[0.] >>> C.eval(C.cross_entropy_with_softmax([1., 2., 3., 4.], [0.35, 0.15, 0.05, 0.45])) #[1.84] Args: output_vector: the unscaled computed output values from the network target_vector: usually it is one-hot vector where the hot bit corresponds to the label index. But it can be any probability distribution over the labels. name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import cross_entropy_with_softmax output_vector = sanitize_input(output_vector, get_data_type(target_vector)) target_vector = sanitize_input(target_vector, get_data_type(output_vector)) return cross_entropy_with_softmax(output_vector, target_vector, name).output()
def criterion(data, label_one_hot): z = model( data ) # apply model. Computes a non-normalized log probability for every output class. loss = C.cross_entropy_with_softmax( z, label_one_hot) # this applies softmax to z under the hood metric = C.classification_error(z, label_one_hot) return loss, metric
def criterion(input: InputSequence[C.layers.Tensor[input_vocab_dim]], labels: LabelSequence[C.layers.Tensor[label_vocab_dim]]): postprocessed_labels = C.sequence.slice( labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = C.cross_entropy_with_softmax(z, postprocessed_labels) errs = C.classification_error(z, postprocessed_labels) return (ce, errs)
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) errs = classification_error(z, graph_label, topN=1) top5Errs = classification_error(z, graph_label, topN=1) if fp16: ce = C.cast(ce, dtype=np.float32) errs = C.cast(errs, dtype=np.float32) top5Errs = C.cast(top5Errs, dtype=np.float32) return { 'name': network_name, 'feature': input_var, 'label': label_var, 'ce': ce, 'errs': errs, 'top5Errs': top5Errs, 'output': z }
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None, feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None): def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file): m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim)) s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim)) def _func(operand): return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape) return _func def MyDNNLayer(hidden_size=128, num_layers=2): return C.layers.Sequential([ C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid)) ]) def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') def _func(operand): return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func # Input variables denoting the features and label data feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1])) label_var = C.sequence.input_variable(num_classes) feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var) label_prior = load_ascii_vector(label_prior_file, 'label_prior') log_prior = C.log(label_prior) if (model_type=="DNN"): net = MyDNNLayer(512,4)(feature_norm) elif (model_type=="BLSTM"): net = MyBLSTMLayer(512,2)(feature_norm) else: raise RuntimeError("model_type must be DNN or BLSTM") out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net) # loss and metric ce = C.cross_entropy_with_softmax(out, label_var) pe = C.classification_error(out, label_var) ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood') # talk to the user C.logging.log_number_of_parameters(out) print() return { 'feature': feature_var, 'label': label_var, 'output': out, 'ScaledLogLikelihood': ScaledLogLikelihood, 'ce': ce, 'pe': pe, 'final_hidden': net # adding last hidden layer output for future use in CTC tutorial }
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({ inp: features, label: labels }, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress( trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def create_model(self): hidden_layers = [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 32] first_input = C.ops.splice(self._input, self._target) first_input_size = first_input.shape first_input = C.ops.reshape( first_input, (first_input_size[0], 1, first_input_size[1])) model = C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=1, activation=C.ops.tanh)(first_input) print(model) for h in hidden_layers: input_new = C.ops.splice(model, first_input, axis=0) model = C.layers.Convolution2D((1, 3), num_filters=h, pad=True, reduction_rank=1, activation=C.ops.tanh)(input_new) print(model) ###### #model = C.ops.splice(model, self._target) # Dense layers direction = C.layers.Sequential([ C.layers.Dense(720, activation=C.ops.relu), C.layers.Dense(360, activation=None) ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) if self._load_model: model = C.load_model(self._file_name) direction = model[0:360] velocity = model[360] C.logging.log_number_of_parameters(model) print(model) #loss = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) #error = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) loss = C.cross_entropy_with_softmax( direction, self._output) + C.squared_error(velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
def fineTuneModel(folder_with_data,path_to_label_csv="label.csv", original_model_path="../vgg13.model",max_epochs=10): trainingValues = getData(folder_with_data,path_to_label_csv) input_var =ct.input((1,height,width),np.float32) label_var = ct.input((num_classes), np.float32) print("cloning old model") z = clone_model(original_model_path,input_var) loss = ct.cross_entropy_with_softmax(z, label_var) metric = ct.classification_error(z, label_var) minibatch_size = 32 epoch_size = trainingValues.getLengthOfData() lr_per_minibatch = [learning_rate]*10+[learning_rate/2.0] mm_time_constant = -minibatch_size/np.log(0.9) lr_schedule = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant) learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = ct.Trainer(z, (loss, metric), learner) print("created trainer and learner") print("training started") while epoch < max_epochs : trainingValues.reset() # Training start_time = time.time() training_loss = 0 training_accuracy = 0 #mini-batch learning while trainingValues.hasMoreMinibatches(): #while there is data for a mini batch: x,y,currBatchSize = trainingValues.getNextMinibatch(minibatch_size) # x - images y - labels/emotions trainer.train_minibatch({ input_var : x, label_var: y}) #maintain stats: training_loss += trainer.previous_minibatch_loss_average * currBatchSize training_accuracy += trainer.previous_minibatch_evaluation_average * currBatchSize training_accuracy /= trainingValues.getLengthOfData() training_accuracy = 1.0 - training_accuracy print("Epoch took:", time.time() - start_time, "seconds") print("training accuracy:\t\t{:.2f}%".format(training_accuracy*100)) epoch +=1 #SAVE MODEL z.save("../vgg13.model")
def ffnet(learner, trainer=None): inputs = 5 outputs = 3 layers = 2 hidden_dimension = 3 if trainer is None: # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)), Dense(outputs, init=C.glorot_uniform(seed=98052)) ]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer]) else: features = trainer.loss_function.arguments[0] label = trainer.loss_function.arguments[1] # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 100 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error, trainer
def __getTrainer(self, _learning_rate=0.03): loss = cntk.cross_entropy_with_softmax(self.__neural_network, self.__output_shape) errs = cntk.classification_error(self.__neural_network, self.__output_shape) return cntk.Trainer(self.__neural_network, (loss, errs), [ cntk.sgd( self.__neural_network.parameters, cntk.learning_rate_schedule(_learning_rate, cntk.UnitType.minibatch)) ])
def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None): # Network definition feat_dim = 784 label_dim = 10 hidden_dim = 200 cur_dir = os.path.dirname(__file__) training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt") test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt") features = C.input(feat_dim) features.name = 'features' feat_scale = C.constant(0.00390625) feats_scaled = C.element_times(features, feat_scale) labels = C.input(label_dim) labels.tag = 'label' labels.name = 'labels' traning_reader = C.CNTKTextFormatReader(training_filename) test_reader = C.CNTKTextFormatReader(test_filename) h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1) out = add_dnn_layer(hidden_dim, label_dim, h1, 1) out.tag = 'output' ec = C.cross_entropy_with_softmax(labels, out) ec.name = criterion_name ec.tag = 'criterion' eval = C.ops.square_error(labels, out) eval.name = eval_name eval.tag = 'eval' # Specify the training parameters (settings are scaled down) my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32, learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0) # Create a context or re-use if already there with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx: # CNTK actions ctx.train( root_nodes=[ec, eval], training_params=my_sgd, input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim)) result = ctx.test( root_nodes=[ec, eval], input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim)) return result
def test_focal_loss_image(): output = C.input_variable((3, 1, 2)) target = C.input_variable((3, 1, 2)) o = np.random.random((3, 1, 2)).astype(np.float32) t = np.array([[[0, 1]], [[0, 0]], [[1, 0]]], dtype=np.float32) ce = C.cross_entropy_with_softmax(output, target, axis=0).eval({output: o, target: t}) fl = Cx.focal_loss_with_softmax(output, target, alpha=1, gamma=0, axis=0).eval({output: o, target: t}) np.testing.assert_almost_equal(ce, fl, decimal=5)
def finalize_network(reader, model_details, max_amount_of_epochs, samples_per_epoch, samples_per_minibatch, pixel_dimensions, classes, learning_rate): features = input_variable(shape=(pixel_dimensions['depth'], pixel_dimensions['height'], pixel_dimensions['width'])) label = input_variable(shape=len(classes)) # speeds up training normalized_features = element_times(1.0 / 256.0, features) model = create_tf_model(model_details, num_classes=len(classes), input_features=normalized_features, freeze=True) loss = cross_entropy_with_softmax(model, label) metric = classification_error(model, label) learner = momentum_sgd(parameters=model.parameters, lr=learning_rate_schedule(learning_rate, UnitType.minibatch), momentum=0.9, l2_regularization_weight=0.0005) reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs) trainer = Trainer(model=model, criterion=(loss, metric), parameter_learners=[learner], progress_writers=[reporter]) log_number_of_parameters(model) map_input_to_streams_train = { features: reader.streams.features, label: reader.streams.labels } training_session(trainer=trainer, mb_source=reader, model_inputs_to_streams=map_input_to_streams_train, mb_size=samples_per_minibatch, progress_frequency=samples_per_epoch, checkpoint_config=CheckpointConfig( frequency=samples_per_epoch, filename=os.path.join("./checkpoints", "ConvNet_Lego_VisiOn"), restore=True)).train() network = {'features': features, 'label': label, 'model': softmax(model)} model_name = f"CNN-3200-224-resnet-18.model" export_path = os.path.abspath( os.path.join("..", "..", "Final models", "CNN", model_name)) model.save(export_path) return network
def test_debug_multi_output(): input_dim = 2 num_output_classes = 2 f_input = input_variable(input_dim, np.float32, needs_gradient=True, name='features') p = parameter(shape=(input_dim,), init=10, name='p') comb = combine([f_input, p]) ins = InStream(['n', 'n', 'n', 'n', 'n']) outs = OutStream() z = times(comb.outputs[0], comb.outputs[1], name='z') z = debug_model(z, ins, outs) l_input = input_variable(num_output_classes, np.float32, name='labels') loss = cross_entropy_with_softmax(z, l_input) eval_error = classification_error(z, l_input) _train(z, loss, eval_error, loss.find_by_name('features'), loss.find_by_name('labels'), num_output_classes, 1) # outs.written contains something like # =================================== forward =================================== # Parameter('p', [], [2]) with uid 'Parameter4' # Input('features', [#, *], [2]) with uid 'Input3' # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # =================================== backward =================================== # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # Input('features', [#, *], [2]) with uid 'Input3' # Parameter('p', [], [2]) with uid 'Parameter4' assert outs.written == out_stuff assert len(outs.written) == 8 v_p = "Parameter('p', " v_i = "Input('features'" v_t = 'Times: ' assert outs.written[0].startswith('=') and 'forward' in outs.written[0] line_1, line_2, line_3 = outs.written[1:4] assert outs.written[4].startswith('=') and 'backward' in outs.written[4] line_5, line_6, line_7 = outs.written[5:8] assert line_5.startswith(v_t) assert line_6.startswith(v_p) and line_7.startswith(v_i) or \ line_6.startswith(v_i) and line_7.startswith(v_p)
def test_factor_dense_for_prediction(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 num_minibatches_to_train = 2000 minibatch_size = 25 learning_rate = 0.5 input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Run the trainer and perform model training training_progress_output_freq = 20 plotdata = {"batchsize":[], "loss":[], "error":[]} for i in range(0, int(num_minibatches_to_train)): features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input : features, label : labels}) # generate some data to predict features, labels = _generate_random_data_sample(10, 2, 2) # factor the model. newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter) original_out = C.softmax(z) factored_out = C.softmax(newz) original_labels_probs = original_out.eval({input : features}) predicted_label_probs = factored_out.eval({input : features}) original_prediction_percentage = _percentage_match(labels, original_labels_probs) # reduced model should have at leat 50% match compared to the original # For the test, we reduced the training minibatches, thus the match is lower. assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) errs = classification_error(z, graph_label, topN=1) top5Errs = classification_error(z, graph_label, topN=5) if fp16: ce = C.cast(ce, dtype=np.float32) errs = C.cast(errs, dtype=np.float32) top5Errs = C.cast(top5Errs, dtype=np.float32) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'errs' : errs, 'top5Errs' : top5Errs, 'output': z }
def test_to_sequence_backprop(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels') ce = C.cross_entropy_with_softmax(z, label_seq_input) seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]] seq2_data = [[0, 0, 1], [0, 1, 1]] seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0], [0, 1]] label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)] param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) # Create a clone of the model that uses a non-sequence input # and converts it to a sequence using to_sequence x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features') x_seq_lens = C.input_variable((), name='sequence_lengths') x_seq = C.to_sequence(x_non_seq_input, x_seq_lens) x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input) ce_clone = ce.clone('share', {x_seq_input : x_seq}) x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3)) x_seq_lens_data = np.asarray([3, 2], dtype=np.float32) x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features') label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels') x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths') param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data}, wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False) assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0]) assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1]) for param in param_grads_1: if not param_grads_1[param].is_sparse: reference_grad_value = param_grads_1[param].asarray() grad_value = param_grads_2[param].asarray() assert np.array_equal(reference_grad_value, grad_value)
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file))) ld = HTKMLFDeserializer(label_mapping_file, StreamDefs( awesome_labels = StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd,ld]) features = C.input_variable(((2*context+1)*feature_dim)) labels = C.input_variable((num_classes)) model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error (z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \ training_session, times feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_parameter_schedule_per_sample(0.3) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_parameter_schedule_per_sample(0.0005) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def create_model(self, frame_mode=False): if frame_mode: self.feat = cntk.input_variable(shape=(feat_dim,)) self.label = cntk.input_variable((label_dim,)) net = cntk.layers.Sequential([cntk.layers.Dense(cell_dim), cntk.layers.Dense(label_dim)]) self.output = net(self.feat) else: #sequence mode self.feat = cntk.sequence.input_variable(shape=(feat_dim,)) self.label = cntk.sequence.input_variable((label_dim,)) net = cntk.layers.Sequential([cntk.layers.Recurrence(cntk.layers.LSTM(shape=label_dim, cell_shape=(cell_dim,)))]) self.output = net(self.feat) self.ce = cntk.cross_entropy_with_softmax(self.output, self.label) self.err = cntk.classification_error(self.output, self.label)
def ffnet(learner, trainer=None): inputs = 5 outputs = 3 layers = 2 hidden_dimension = 3 if trainer is None: # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)), Dense(outputs, init=C.glorot_uniform(seed=98052))]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer]) else: features = trainer.loss_function.arguments[0] label = trainer.loss_function.arguments[1] # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 100 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error, trainer
def train_eval_logistic_regression_from_file(criterion_name=None, eval_name=None, device_id=-1): cur_dir = os.path.dirname(__file__) # Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial train_file = os.path.join(cur_dir, "Train-3Classes.txt") test_file = os.path.join(cur_dir, "Test-3Classes.txt") X = C.input(2) y = C.input(3) W = C.parameter(value=np.zeros(shape=(3, 2))) b = C.parameter(value=np.zeros(shape=(3, 1))) out = C.times(W, X) + b out.tag = 'output' ce = C.cross_entropy_with_softmax(y, out) ce.name = criterion_name ce.tag = 'criterion' eval = C.ops.square_error(y, out) eval.tag = 'eval' eval.name = eval_name # training data readers train_reader = C.CNTKTextFormatReader(train_file, randomize=None) # testing data readers test_reader = C.CNTKTextFormatReader(test_file, randomize=None) my_sgd = C.SGDParams( epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('logreg') as ctx: ctx.device_id = device_id ctx.train( root_nodes=[ce, eval], training_params=my_sgd, input_map=train_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) result = ctx.test( root_nodes=[ce, eval], input_map=test_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) return result
def test_lstm_over_lstm_thought_vectors_2(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances') conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths') label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(utterances_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.sequence.last(model) model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input)) model = C.to_sequence_like(model, label_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model ce = C.cross_entropy_with_softmax(z, label_input) sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu()) c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu()) c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu()) c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu()) c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu()) c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu()) c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu()) all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32) seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0]] seq3_label_data = [[1, 0], [0, 1]] label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)] param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) loss_result = loss_result.as_sequences() absolute_tolerance = 0.01 assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance) assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance) assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input_variable(((2*context+1)*feature_dim)) labels = sequence.input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def ffnet(optimizer, num_minibatches_to_train, learning_rate_func, lr_args, learner_kwargs): inputs = 2 outputs = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=SEED)), Dense(outputs, init=C.glorot_uniform(seed=SEED))]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr= learning_rate_func(0.125, *lr_args) progress_printer = ProgressPrinter(0) learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(z.parameters, lr, **learner_kwargs) trainer = C.Trainer(z, (ce, pe), [learner], progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data( minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) test_features, test_labels = generate_random_data( minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch( {features: test_features, label: test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return z.parameters
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = C.learning_parameter_schedule(0.125) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def train_eval_logistic_regression_with_numpy(criterion_name=None, eval_name=None, device_id=-1): # for repro and tests :-) np.random.seed(1) train_X, train_y = synthetic_data(train_N, feature_dim, num_classes) test_X, test_y = synthetic_data(test_N, feature_dim, num_classes) # Set up the training data for CNTK. Before writing the CNTK configuration, # the data will be attached to X.reader.batch and y.reader.batch and then # serialized. X = C.input_numpy(train_X) y = C.input_numpy(train_y) # define our network -- one weight tensor and a bias W = C.parameter(value=np.zeros(shape=(num_classes, feature_dim))) b = C.parameter(value=np.zeros(shape=(num_classes, 1))) out = C.times(W, X) + b ce = C.cross_entropy_with_softmax(y, out) ce.tag = 'criterion' ce.name = criterion_name eval = C.ops.cntk1.SquareError(y, out) eval.tag = 'eval' eval.name = eval_name my_sgd = C.SGDParams(epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('logreg', clean_up=True) as ctx: ctx.device_id = device_id ctx.train( root_nodes=[ce,eval], training_params=my_sgd) # For testing, we attach the test data to the input nodes. X.reader.batch, y.reader.batch = test_X, test_y result = ctx.test(root_nodes=[ce,eval]) return result
def test_udf_checkpointing(tmpdir): dev, w_value, c1_value, c2_value, op = build_test_function() label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32)) loss = C.cross_entropy_with_softmax(op, label) eval_error = C.classification_error(op, label) lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch) learner = C.sgd(op.parameters, lr_schedule) trainer = C.Trainer(op, (loss, eval_error), [learner]) trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev) filepath = str(tmpdir / 'test_checkpointing.out') trainer.save_checkpoint(filepath, external_state={'test': 'test'}) d = C.cntk_py.Dictionary.load(filepath) assert len(d.keys()) != 0
def train_eval_logistic_regression_with_numpy(criterion_name=None, eval_name=None, device_id=-1): # for repro and tests :-) np.random.seed(1) N = 500 d = 250 # create synthetic data using numpy X = np.random.randn(N, d) Y = np.random.randint(size=(N, 1), low=0, high=2) Y = np.hstack((Y, 1-Y)) # set up the training data for CNTK x = C.input_numpy(X) y = C.input_numpy(Y) # define our network -- one weight tensor and a bias W = C.parameter(value=np.zeros(shape=(2, d))) b = C.parameter(value=np.zeros(shape=(2, 1))) out = C.times(W, x) + b ce = C.cross_entropy_with_softmax(y, out) ce.tag = 'criterion' ce.name = criterion_name eval = C.ops.cntk1.SquareError(y, out) eval.tag = 'eval' eval.name = eval_name my_sgd = C.SGDParams(epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('logreg') as ctx: ctx.device_id = device_id ctx.train( root_nodes=[ce,eval], training_params=my_sgd) result = ctx.test(root_nodes=[ce,eval]) return result
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 pp = ProgressPrinter(0) for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) pp.update_with_trainer(trainer) last_avg_error = pp.avg_loss_since_start() test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error