def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones): minibatch_size = 100; epocs=900; minibatchIteraciones = int(len(entrOperaciones) / minibatch_size); # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = crearRed(input_dim, num_output_classes, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs); trainer = Trainer(netout, (ce, pe), learner, progress_printer) if os.path.isfile(checkpoint): trainer.restore_from_checkpoint(checkpoint); npentrRuedas = np.array(entrRuedas).astype(np.float32); npentrOperaciones = np.array(entrOperaciones).astype(np.float32); #iteramos una vez por cada "epoc" for i in range(0, epocs): p = np.random.permutation(len(entrRuedas)); npentrOperaciones = npentrOperaciones[p]; npentrRuedas = npentrRuedas[p]; #ahora partimos los datos en "minibatches" y entrenamos for j in range(0, minibatchIteraciones): features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size]; labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size]; trainer.train_minibatch({feature: features, label: labels}); trainer.summarize_training_progress() trainer.save_checkpoint(checkpoint); minibatchIteraciones = int(len(testOperaciones) / minibatch_size); avg_error = 0; for j in range(0, minibatchIteraciones): test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); #test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32); #test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32); avg_error = avg_error + ( trainer.test_minibatch( {feature: test_features, label: test_labels}) / minibatchIteraciones) return avg_error
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def _sparse_to_dense_network_cache(input_shape, is_sequence, device): from cntk.ops import times, input, sequence if is_sequence: temp_input = sequence.input(input_shape, is_sparse=True) else: temp_input = input(input_shape, is_sparse=True) eye_shape = input_shape[-1] return times(temp_input, np.eye(eye_shape))
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input(shape=input_dim, is_sparse=True) label = input(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def ffnet(data, labels): input_dim = 800 num_output_classes = 3 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 features, labels = generate_stock_data(minibatch_size); for i in range(1024): # features, labels = generate_random_data( # minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def cargarRedDesdeArchivo(archivo): input_dim = 800; num_output_classes = 3; feature = input((input_dim), np.float32); label = input((num_output_classes), np.float32) netout = crearRed(input_dim, 3, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(1) trainer = Trainer(netout, (ce, pe), learner, progress_printer) trainer.restore_from_checkpoint(archivo); return netout;
def test_for_constructor_layer(layers_count, dense_units): x = input(4) network = For(range(layers_count), lambda i: Dense(dense_units)) expected_num_of_parameters = 2 * layers_count assert len(network.parameters) == expected_num_of_parameters res = network(x) expected_output_shape = (dense_units, ) assert res.shape == expected_output_shape
def test_resnet_block(input_data): x = input(len(input_data)) res_net = ResNetBlock(square)(x) np_data = np.asarray(input_data, np.float32) actual_res = res_net.eval(np_data) expected_res = np.square(np_data) + np_data expected_res.shape = (1, ) + expected_res.shape np.testing.assert_array_equal(actual_res, expected_res)
def test_sequential_constructor(input_data): x = input(len(input_data)) np_data = np.asarray(input_data, np.float32) seq_layers = Sequential([abs, sqrt, square, cos])(x) assert seq_layers.shape == x.shape res = seq_layers(np_data) expected_res = np.cos(np.square(np.sqrt(np.abs(np_data)))) np.testing.assert_array_almost_equal(res[0], expected_res, decimal=4)
def test_sequential_clique_with_layers(input_elements, expected): x = input(input_elements) np_data = np.arange(input_elements, dtype=np.float32) unit_dense = Dense(input_elements, activation=None, init=1) seq_clique = SequentialClique([unit_dense, unit_dense, unit_dense])(x) assert seq_clique.shape == x.shape res = seq_clique.eval(np_data) assert res[0].shape == (input_elements, ) assert np.unique(res[0])[0] == expected
def test_sequential_clique_with_functions(input_data): x = input(len(input_data)) seq_clique = SequentialClique([abs, sqrt, square])(x) assert seq_clique.shape == x.shape np_data = np.asarray(input_data, np.float32) res = seq_clique.eval(np_data) expected_res = np.abs(np_data) + np_data expected_res += np.sqrt(expected_res) expected_res = np.square(expected_res) expected_res.shape = (1, ) + expected_res.shape np.testing.assert_array_almost_equal(res, expected_res, decimal=4)
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = input(input_dim, np.float32) label = input(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), adadelta(z.parameters), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, var_to_stream=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
#createStandardizedData(data_file_path, num_records = 100000) # a very simple equation createStandardizedData2(data_file_path, num_records = 100000) # a slightly complex equation """ Hyperparameters """ input_dim = 3 output_dim = 1 hidden_dim = 10 learning_rate = 0.001 minibatch_size = 120 """ Input and output shapes """ feature = input((input_dim), np.float32) label = input((output_dim), np.float32) """ Create model, reader and map """ netout = create_model(input_dim, output_dim, hidden_dim, feature) training_reader = create_reader(data_file_path, True, input_dim, output_dim) input_map = { label : training_reader.streams.labels, feature : training_reader.streams.features } """ Set loss and evaluation functions """
def __init__(self, in_shape, output_shape, device_id=None, learning_rate=0.00025, momentum=0.9, minibatch_size=32, update_interval=10000, n_workers=1, visualizer=None): """ Q Neural Network following Mnih and al. implementation and default options. The network has the following topology: Convolution(32, (8, 8)) Convolution(64, (4, 4)) Convolution(64, (2, 2)) Dense(512) :param in_shape: Shape of the observations perceived by the learner (the neural net input) :param output_shape: Size of the action space (mapped to the number of output neurons) :param device_id: Use None to let CNTK select the best available device, -1 for CPU, >= 0 for GPU (default: None) :param learning_rate: Learning rate (default: 0.00025, as per Mnih et al.) :param momentum: Momentum, provided as momentum value for averaging gradients without unit gain filter Note that CNTK does not currently provide an implementation of Graves' RmsProp with momentum. It uses AdamSGD optimizer instead. (default: 0, no momentum with RProp optimizer) :param minibatch_size: Minibatch size (default: 32, as per Mnih et al.) :param n_workers: Number of concurrent worker for distributed training. (default: 1, not distributed) :param visualizer: Optional visualizer allowing the model to save summary data (default: None, no visualization) Ref: Mnih et al.: "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533. """ assert learning_rate > 0, 'learning_rate should be > 0' assert 0. <= momentum < 1, 'momentum should be 0 <= momentum < 1' QModel.__init__(self, in_shape, output_shape) CntkModel.__init__(self, device_id, False, n_workers, visualizer) self._nb_actions = output_shape self._steps = 0 self._target_update_interval = update_interval self._target = None # Input vars self._environment = input(in_shape, name='env', dynamic_axes=(Axis.default_batch_axis())) self._q_targets = input(1, name='q_targets', dynamic_axes=(Axis.default_batch_axis())) self._actions = input(output_shape, name='actions', dynamic_axes=(Axis.default_batch_axis())) # Define the neural network graph self._model = self._build_model()(self._environment) self._target = self._model.clone( CloneMethod.freeze, {self._environment: self._environment}) # Define the learning rate lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) # AdamSGD optimizer m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._model.parameters, lr_schedule, momentum=m_schedule, unit_gain=True, variance_momentum=vm_schedule) if self.distributed_training: raise NotImplementedError('ASGD not implemented yet.') # _actions is a sparse 1-hot encoding of the actions done by the agent q_acted = reduce_sum(self._model * self._actions, axis=0) # Define the trainer with Huber Loss function criterion = huber_loss(q_acted, self._q_targets, 1.0) self._learner = l_sgd self._trainer = Trainer(self._model, (criterion, None), l_sgd)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input((num_channels, image_height, image_width)) label_input = input(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print( "Training transfer learning model for {0} epochs (epoch_size = {1}).". format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save( os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch + 1))) return frcn_output
def __init__(self, in_shape, output_shape, device_id=None, learning_rate=0.00025, momentum=0.9, minibatch_size=32, update_interval=10000, n_workers=1, visualizer=None): """ Q Neural Network following Mnih and al. implementation and default options. The network has the following topology: Convolution(32, (8, 8)) Convolution(64, (4, 4)) Convolution(64, (2, 2)) Dense(512) :param in_shape: Shape of the observations perceived by the learner (the neural net input) :param output_shape: Size of the action space (mapped to the number of output neurons) :param device_id: Use None to let CNTK select the best available device, -1 for CPU, >= 0 for GPU (default: None) :param learning_rate: Learning rate (default: 0.00025, as per Mnih et al.) :param momentum: Momentum, provided as momentum value for averaging gradients without unit gain filter Note that CNTK does not currently provide an implementation of Graves' RmsProp with momentum. It uses AdamSGD optimizer instead. (default: 0, no momentum with RProp optimizer) :param minibatch_size: Minibatch size (default: 32, as per Mnih et al.) :param n_workers: Number of concurrent worker for distributed training. (default: 1, not distributed) :param visualizer: Optional visualizer allowing the model to save summary data (default: None, no visualization) Ref: Mnih et al.: "Human-level control through deep reinforcement learning." Nature 518.7540 (2015): 529-533. """ assert learning_rate > 0, 'learning_rate should be > 0' assert 0. <= momentum < 1, 'momentum should be 0 <= momentum < 1' QModel.__init__(self, in_shape, output_shape) CntkModel.__init__(self, device_id, False, n_workers, visualizer) self._nb_actions = output_shape self._steps = 0 self._target_update_interval = update_interval self._target = None # Input vars self._environment = input(in_shape, name='env', dynamic_axes=(Axis.default_batch_axis())) self._q_targets = input(1, name='q_targets', dynamic_axes=(Axis.default_batch_axis())) self._actions = input(output_shape, name='actions', dynamic_axes=(Axis.default_batch_axis())) # Define the neural network graph self._model = self._build_model()(self._environment) self._target = self._model.clone( CloneMethod.freeze, {self._environment: self._environment} ) # Define the learning rate lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) # AdamSGD optimizer m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._model.parameters, lr_schedule, momentum=m_schedule, unit_gain=True, variance_momentum=vm_schedule) if self.distributed_training: raise NotImplementedError('ASGD not implemented yet.') # _actions is a sparse 1-hot encoding of the actions done by the agent q_acted = reduce_sum(self._model * self._actions, axis=0) # Define the trainer with Huber Loss function criterion = huber_loss(q_acted, self._q_targets, 1.0) self._learner = l_sgd self._trainer = Trainer(self._model, (criterion, None), l_sgd)