def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0]*80 + [0.1]*40 + [0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1]*1 + [1.0]*80 + [0.1]*40 + [0.01] else: return RuntimeError("Unknown model name!") l2_reg_weight = 0.0001 # Set learning parameters minibatch_size = 128 lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size = minibatch_size, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size = minibatch_size) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80): minibatch_size = 64 # learning parameters learner = momentum_sgd(model.parameters, lr = learning_parameter_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], minibatch_size=1, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer): # CNTK weights new gradient by (1-momentum) for unit gain, # thus we divide Caffe's learning rate by (1-momentum) initial_learning_rate = 2.0 # equal to 0.2 in caffe initial_learning_rate *= minibatch_size / 128 learn_rate_adjust_interval = 2 learn_rate_decrease_factor = 0.94 # Set learning parameters lr_per_mb = [] learning_rate = initial_learning_rate for i in range(0, num_epochs, learn_rate_adjust_interval): lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) learning_rate *= learn_rate_decrease_factor lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9) l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80): minibatch_size = 64 # learning parameters learner = momentum_sgd(model.parameters, lr=learning_parameter_schedule( [0.0015625] * 20 + [0.00046875] * 20 + [0.00015625] * 20 + [0.000046875] * 10 + [ 0.000015625], minibatch_size=1, epoch_size=epoch_size), momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 + [1200], epoch_size=epoch_size), l2_regularization_weight=0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model); print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. # trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def Evaluator(criterion): loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr=learning_parameter_schedule(1), momentum=momentum_schedule(0)) return Trainer(None, (loss, metric), dummy_learner)
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, momentum_schedule_per_sample loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_parameter_schedule(1), momentum = momentum_schedule_per_sample(0)) return Trainer(model, (loss, metric), dummy_learner)
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, momentum_schedule_per_sample loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr=learning_parameter_schedule(1), momentum=momentum_schedule_per_sample(0)) return Trainer(model, (loss, metric), dummy_learner)
def test_learner_update(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) res = i * w learner = sgd(res.parameters, lr=C.learning_parameter_schedule([0.1]*50 + [0.2]*50, minibatch_size = 1, epoch_size=1)) assert learner.learning_rate() == 0.1 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.2 assert w.value < w_init learner.reset_learning_rate(learning_parameter_schedule([0.3]*50 + [0.4]*50, minibatch_size = 1, epoch_size=1)); assert learner.learning_rate() == 0.3 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.4
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: return RuntimeError("Unknown model name!") l2_reg_weight = 0.0001 # Set learning parameters minibatch_size = 128 lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_parameter_schedule(lr_per_mb, minibatch_size=minibatch_size, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size=minibatch_size) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_parameter_schedule(lr_per_mb) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) #progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) batch_index = 0 plot_data = {'batchindex': list(), 'loss': list(), 'error': list()} log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far #if sample_count % (100 * mb_size) == 0: # print ("Processed {0} samples".format(sample_count)) # For visualization... #print("type of plot_data:", type(plot_data), type(plot_data['batchindex']), type(plot_data['loss']),type(plot_data['error'])) plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) #plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') #plt.show() plt.savefig(output_figure_loss, bbox_inches='tight' ) plt.figure(2) #plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') #plt.show() plt.savefig(output_figure_error, bbox_inches='tight') return tl_model
def train(self, path_to_folder: str, model_definition, epochs: int = 10, output_model_path: str = "driver_model.onnx"): path_to_folder = path_to_folder.rstrip('/') map_file_train = path_to_folder + "/train_map.txt" map_file_test = path_to_folder + "/test_map.txt" mean_file = path_to_folder + "/meanfile.xml" classes_set = set() num_train = 0 num_test = 0 num_channels = 3 try: with open(map_file_train) as f: csv_reader = csv.reader(f, delimiter='\t') for row in csv_reader: cmd = row[1] classes_set.add(cmd) num_train = num_train + 1 except Exception as e: raise Exception( "No train_map.txt file found in path " + path_to_folder + ". Did you create a dataset using create_balanced_dataset()?") num_classes = len(classes_set) with open(map_file_test) as f: for num_test, l in enumerate(f): pass transforms = [ xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'), xforms.mean(mean_file), ] # ImageDeserializer loads images in the BGR format, not RGB reader_train = MinibatchSource( ImageDeserializer( map_file_train, StreamDefs(features=StreamDef(field='image', transforms=transforms), labels=StreamDef(field='label', shape=num_classes)))) reader_test = MinibatchSource( ImageDeserializer( map_file_test, StreamDefs(features=StreamDef(field='image', transforms=transforms), labels=StreamDef(field='label', shape=num_classes)))) input_var = input_variable( (num_channels, self.__image_height, self.__image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) model = model_definition(input_var) ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) epoch_size = num_train minibatch_size = 64 lr_per_minibatch = learning_parameter_schedule([0.01] * 10 + [0.003] * 10 + [0.001], epoch_size=epoch_size) momentums = momentum_schedule(0.9, minibatch_size=minibatch_size) l2_reg_weight = 0.001 learner = momentum_sgd(model.parameters, lr=lr_per_minibatch, momentum=momentums, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs) trainer = cntk.train.Trainer(model, (ce, pe), [learner], [progress_printer]) input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } batch_index = 0 plot_data = {'batchindex': [], 'loss': [], 'error': []} for epoch in range(epochs): sample_count = 0 while sample_count < epoch_size: data = reader_train.next_minibatch(min( minibatch_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += data[label_var].num_samples plot_data['batchindex'].append(batch_index) plot_data['loss'].append( trainer.previous_minibatch_loss_average) plot_data['error'].append( trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() epoch_size = num_test minibatch_size = 16 metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) data = reader_test.next_minibatch(current_minibatch, input_map=input_map) metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") model.save(output_model_path, format=ModelFormat.ONNX)
def __train_cntk(self, path_to_folder: str, model_definition, epochs: int, output_model_path: str, classes, minibatch_size: int): import cntk from cntk.learners import learning_parameter_schedule from cntk.ops import input_variable from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer import cntk.io.transforms as xforms from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization from cntk.ops.functions import CloneMethod from cntk.logging import ProgressPrinter from cntk.losses import cross_entropy_with_softmax from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd import pandas as pd path_to_folder = path_to_folder.rstrip('/') map_file_train = path_to_folder + "/train_map.txt" map_file_test = path_to_folder + "/test_map.txt" classes_set = set() num_train = 0 num_test = 0 num_channels = 3 class TrackDataset(UserDeserializer): def __init__(self, map_file, streams, chunksize=100): super(TrackDataset, self).__init__() self._batch_size = chunksize self.dataframes = pd.read_csv(map_file, sep='\t', dtype=str, header=None, names=["features", "labels"]) self._streams = [ cntk.io.StreamInformation(s['name'], i, 'dense', np.float32, s['shape']) for i, s in enumerate(streams) ] self._num_chunks = int( math.ceil(len(self.dataframes) / chunksize)) def _scale_image(self, image, width=224, height=168): try: return image.resize((width, height), Image.LINEAR) except: raise Exception('scale_image error') def stream_infos(self): return self._streams def num_chunks(self): return self._num_chunks def get_chunk(self, chunk_id): images = [] labels = [] maximum = (chunk_id + 1) * self._batch_size if (maximum > len(self.dataframes)): maximum = len(self.dataframes) for i in range(chunk_id * self._batch_size, maximum): img_name = self.dataframes.iloc[i, 0] image = Image.open(img_name) cl = self.dataframes.iloc[i, 1:].values[0] image = self._scale_image(image) image = np.moveaxis((np.array(image).astype('float32')), -1, 0) image -= np.mean(image, keepdims=True) image /= (np.std(image, keepdims=True) + 1e-6) images.append(image) yv = np.zeros(num_classes) yv[classes.index(cl)] = 1 labels.append(yv) result = {} features = np.array(images) lab = np.array(labels).astype('float32') result[self._streams[0].m_name] = features result[self._streams[1].m_name] = lab return result try: with open(map_file_train) as f: csv_reader = csv.reader(f, delimiter='\t') for row in csv_reader: cmd = row[1] classes_set.add(cmd) num_train = num_train + 1 except Exception as e: raise Exception( "No train_map.txt file found in path " + path_to_folder + ". Did you create a dataset using create_balanced_dataset()?") num_classes = len(classes) with open(map_file_test) as f: for num_test, l in enumerate(f): pass # transforms = [ # xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'), # xforms.mean(mean_file) # ] dataset_train = TrackDataset(map_file=map_file_train, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_train = MinibatchSource([dataset_train], randomize=True) # a = dataset_train.num_chunks() dataset_test = TrackDataset(map_file=map_file_test, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_test = MinibatchSource([dataset_test], randomize=True) # ImageDeserializer loads images in the BGR format, not RGB # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # mb = reader_train.next_minibatch(10) input_var = input_variable( (num_channels, self.__image_height, self.__image_width)) label_var = input_variable((num_classes)) model = model_definition(input_var) ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) epoch_size = num_train lr_per_minibatch = learning_parameter_schedule([0.01] * 10 + [0.003] * 10 + [0.001], epoch_size=epoch_size) momentums = momentum_schedule(0.9, minibatch_size=minibatch_size) l2_reg_weight = 0.001 learner = momentum_sgd(model.parameters, lr=lr_per_minibatch, momentum=momentums, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs) trainer = cntk.train.Trainer(model, (ce, pe), [learner], [progress_printer]) input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } print("Training started") batch_index = 0 plot_data = {'batchindex': [], 'loss': [], 'error': []} for epoch in range(epochs): sample_count = 0 while sample_count < epoch_size: data: MinibatchSource = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += data[label_var].num_samples batch_index += 1 plot_data['batchindex'].append(batch_index) plot_data['loss'].append( trainer.previous_minibatch_loss_average) plot_data['error'].append( trainer.previous_minibatch_evaluation_average) trainer.summarize_training_progress() metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 epoch_size = num_test while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) data = reader_test.next_minibatch(current_minibatch, input_map=input_map) metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") model.save(output_model_path, format=ModelFormat.ONNX)
def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation
def test_learner_init_legacy(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def test_learner_init(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w #test new API: learning_parameter_schedule #explictly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def train_model(cntkModel, params, input_map): log = logging.getLogger("neuralnets1.utils.train_model") mb_size = params['mb_size'] num_epochs = params['num_epochs'] epoch_size_train = params['epoch_size_train'] epoch_size_test = params['epoch_size_test'] minibatch_source_train = params['train_mbs'] minibatch_source_valid = params['valid_mbs'] #minibatch_source_test = params['test_mbs'] ; # Instantiate the trainer object #lr_schedule = learning_rate_schedule(params['learn_rate'], unit=UnitType.minibatch) lr_per_minibatch = learning_parameter_schedule(params['learn_rate'], minibatch_size=mb_size, epoch_size=epoch_size_train) mm_schedule = momentum_schedule(params['beta_momentum_gd']) learner = momentum_sgd(cntkModel.parameters, lr_per_minibatch, mm_schedule, l2_regularization_weight=params['l2_reg_weight']) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (params['ce'], params['pe']), learner, progress_writers) # Run training epochs log.info( 'Training transfer learning model for %s epochs (epoch_size_train = %s ) .' % (num_epochs, epoch_size_train)) # print("Training transfer learning model for {0} epochs (epoch_size_train = {1}).".format(num_epochs, epoch_size_train)) errsVal = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: log.info("Training: processed %s samples" % sample_counts) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsVal.append( cntkComputeTestError(trainer, minibatch_source_valid, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsVal, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), validation error (green)') plt.draw() return cntkModel
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_parameter_schedule(lr_per_mb) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print( "Training transfer learning model for {0} epochs (epoch_size = {1}).". format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
x = C.input_variable(shape=(2, ), needs_gradient=False) t = C.input_variable(shape=(3, ), needs_gradient=False) init = C.initializer.normal(0.01) with C.layers.default_options(init=init): z = C.layers.Sequential( [C.layers.Dense(12, activation=C.relu), C.layers.Dense(3)]) y = C.cross_entropy_with_softmax(z(x), t) acc = C.classification_error(z(x), t) batch_size = 20 from cntk.learners import sgd, learning_parameter_schedule lr = learning_parameter_schedule([.5 * (.1**i) for i in range(10000)], minibatch_size=batch_size, epoch_size=1000 * batch_size) learner = sgd(z.parameters, lr) trainer = C.Trainer(z(x), (y, acc), [learner]) for i in range(min(dataset_size, 100000) // batch_size): sample = X[batch_size * i:batch_size * (i + 1)] target = labels[batch_size * i:batch_size * (i + 1)] trainer.train_minibatch({x: sample, t: target}) loss = trainer.previous_minibatch_loss_average acc = trainer.previous_minibatch_evaluation_average print("cost {} - classification error {} - learning rate {}".format( loss, acc, learner.learning_rate())) y = C.argmax(z(x)) accuracy = 0
def __init__(self, input_shape, nb_actions, gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000), learning_rate=0.00025, momentum=0.95, minibatch_size=32, memory_size=500000, train_after=200000, train_interval=4, target_update_interval=10000, monitor=True): self.input_shape = input_shape self.nb_actions = nb_actions self.gamma = gamma self._train_after = train_after self._train_interval = train_interval self._target_update_interval = target_update_interval self._explorer = explorer self._minibatch_size = minibatch_size self._history = History(input_shape) self._memory = ReplayMemory(memory_size, input_shape[1:], 4) self._num_actions_taken = 0 # Metrics accumulator self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], [] # Action Value model (used by agent to interact with the environment) with default_options(activation=relu, init=he_uniform()): self._action_value_net = Sequential([ Convolution2D((8, 8), 16, strides=4), Convolution2D((4, 4), 32, strides=2), Convolution2D((3, 3), 32, strides=1), Dense(256, init=he_uniform(scale=0.01)), Dense(nb_actions, activation=None, init=he_uniform(scale=0.01)) ]) self._action_value_net.update_signature(Tensor[input_shape]) # Target model used to compute the target Q-values in training, updated # less frequently for increased stability. self._target_net = self._action_value_net.clone(CloneMethod.freeze) # Function computing Q-values targets as part of the computation graph @Function @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def compute_q_targets(post_states, rewards, terminals): return element_select( terminals, rewards, gamma * reduce_max(self._target_net(post_states), axis=0) + rewards, ) # Define the loss, using Huber Loss (more robust to outliers) @Function @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions], post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def criterion(pre_states, actions, post_states, rewards, terminals): # Compute the q_targets q_targets = compute_q_targets(post_states, rewards, terminals) # actions is a 1-hot encoding of the action done by the agent q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0) # Define training criterion as the Huber Loss function return huber_loss(q_targets, q_acted, 1.0) # Adam based SGD lr_schedule = learning_parameter_schedule(learning_rate) m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._action_value_net.parameters, lr_schedule, momentum=m_schedule, variance_momentum=vm_schedule) self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None self._learner = l_sgd self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
def __init__(self, input_shape, nb_actions, gamma=0.99, explorer=ExpEpsilonAnnealingExplorer(1, 0.1, 1000000), learning_rate=0.0005, momentum=0.95, minibatch_size=128, memory_size=500000, train_after=256, train_interval=2, target_update_interval=10000, monitor=True): self.input_shape = input_shape self.nb_actions = nb_actions self.gamma = gamma self._train_after = train_after self._train_interval = train_interval self._target_update_interval = target_update_interval self._explorer = explorer self._minibatch_size = minibatch_size self._memory = ReplayMemory(memory_size, input_shape) self._num_actions_taken = 0 # Metrics accumulator self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], [] # Action Value model (used by agent to interact with the environment) with default_options(activation=relu, init=he_uniform()): self._action_value_net = Sequential([ # Convolution2D((8, 8), 16, strides=4), # Convolution2D((4, 4), 32, strides=2), # Convolution2D((3, 3), 32, strides=1), Dense(128, init=he_uniform()), Dense(128, init=he_uniform()), Dense(nb_actions, activation=None, init=he_uniform()) ]) self._action_value_net.update_signature(Tensor[input_shape]) # Target model used to compute the target Q-values in training, updated # less frequently for increased stability. self._target_net = self._action_value_net.clone(CloneMethod.freeze) # Function computing Q-values targets as part of the computation graph @Function @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def compute_q_targets(post_states, rewards, terminals): return element_select( terminals, rewards, gamma * reduce_max(self._target_net(post_states), axis=0) + rewards, ) # Define the loss, using Huber Loss (more robust to outliers) @Function @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions], post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def criterion(pre_states, actions, post_states, rewards, terminals): # Compute the q_targets q_targets = compute_q_targets(post_states, rewards, terminals) # actions is a 1-hot encoding of the action done by the agent q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0) # Define training criterion as the Huber Loss function return huber_loss(q_targets, q_acted, 1.0) # Adam based SGD lr_schedule = learning_parameter_schedule(learning_rate) m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._action_value_net.parameters, lr_schedule, momentum=m_schedule, variance_momentum=vm_schedule) log_dir = 'metrics/' + datetime.now().strftime('%Y%m%d%H%M%S') self._metrics_writer = TensorBoardProgressWriter( freq=1, log_dir=log_dir, model=criterion) if monitor else None self._learner = l_sgd self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
def test_learner_init_legacy(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
((0.2, 0), [0.2], 0), ((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0), (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size):
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, minibatch_size, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False, fp16=False): """ :param reader_train: :param reader_test: :param network_name: :param epoch_size: 一个epoch有多少样本 :param max_epochs: 训练多少个epoch :param model_dir: :param log_dir: :param tensorboard_logdir: :param gen_heartbeat: :param fp16: :return:准确率,用时 """ set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) with C.default_options(dtype=np.float32): # create model, and configure learning parameters model = create_cifar10_model(input_var, 3, num_classes) # loss and metric loss = cross_entropy_with_softmax(model, label_var) error_rate = classification_error(model, label_var) # shared training parameters # Set learning parameters lr_per_sample = [] check_point = [80, 120, 160, 180] lrs = [3e-2, 3e-3, 3e-4, 3e-4, 5e-5] for i in range(max_epochs + 1): if i in range(0, check_point[0]): lr_per_sample.append(lrs[0]) if i in range(check_point[0], check_point[1]): lr_per_sample.append(lrs[1]) if i in range(check_point[1], check_point[2]): lr_per_sample.append(lrs[2]) if i in range(check_point[2], check_point[3]): lr_per_sample.append(lrs[3]) if i > check_point[3]: lr_per_sample.append(lrs[4]) lr_schedule = learning_parameter_schedule(lr_per_sample, minibatch_size=minibatch_size, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size) #动量 # progress writers progress_writers = [ ProgressPrinter(tag='Training', num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=model) progress_writers.append(tensorboard_writer) # trainer object l2_reg_weight = 0.0001 learner = adam(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (loss, error_rate), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(model) print("*********Training Start*********") start = time.clock() for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in model.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: model.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch # Evaluation parameters test_epoch_size = 10000 minibatch_size = 32 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") elapsed = (time.clock() - start) return 1 - metric_numer / metric_denom, elapsed