def create_bn_inception(): # Input variables denoting the features and label data feature_var = input_variable((NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH)) label_var = input_variable((NUM_CLASSES)) bn_time_const = 4096 z = bn_inception_cifar_model(feature_var, NUM_CLASSES, bn_time_const) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label' : label_var, 'ce' : ce, 'pe' : pe, 'pe5' : pe5, 'output' : z }
def train_lm(testing=False): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) ; print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate) momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample) gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) last_avg_ce = 0 for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence : features, label_sequence : labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 last_avg_ce = av_ce num_trained_samples += token_count num_trained_samples_since_last_report += token_count if not testing: # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save(model_filename) print("Saved model to '%s'" % model_filename) return last_avg_ce
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def finalize_network(reader, model_details, max_amount_of_epochs, samples_per_epoch, samples_per_minibatch, pixel_dimensions, classes, learning_rate): features = input_variable(shape=(pixel_dimensions['depth'], pixel_dimensions['height'], pixel_dimensions['width'])) label = input_variable(shape=len(classes)) # speeds up training normalized_features = element_times(1.0 / 256.0, features) model = create_tf_model(model_details, num_classes=len(classes), input_features=normalized_features, freeze=True) loss = cross_entropy_with_softmax(model, label) metric = classification_error(model, label) learner = momentum_sgd(parameters=model.parameters, lr=learning_rate_schedule(learning_rate, UnitType.minibatch), momentum=0.9, l2_regularization_weight=0.0005) reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs) trainer = Trainer(model=model, criterion=(loss, metric), parameter_learners=[learner], progress_writers=[reporter]) log_number_of_parameters(model) map_input_to_streams_train = { features: reader.streams.features, label: reader.streams.labels } training_session(trainer=trainer, mb_source=reader, model_inputs_to_streams=map_input_to_streams_train, mb_size=samples_per_minibatch, progress_frequency=samples_per_epoch, checkpoint_config=CheckpointConfig( frequency=samples_per_epoch, filename=os.path.join("./checkpoints", "ConvNet_Lego_VisiOn"), restore=True)).train() network = {'features': features, 'label': label, 'model': softmax(model)} model_name = f"CNN-3200-224-resnet-18.model" export_path = os.path.abspath( os.path.join("..", "..", "Final models", "CNN", model_name)) model.save(export_path) return network
def main(): prepare_dir() # create the vocab dir and model dir network = create_model(vocab_sqrt) if opt.pre_model: network['model'].restore(opt.pre_model) log_number_of_parameters(network['model']) location_path = os.path.join(opt.vocabdir, opt.alloc_file) for i in range(len(opt.epochs)): train(network, location_path, i) location_path = get_k_round_location_path(i + 1) Communicator.finalize()
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def main(): prepare_dir() # create the vocab dir and model dir network = create_model(vocab_sqrt) if opt.pre_model: network['model'].restore(opt.pre_model) log_number_of_parameters(network['model']) location_path = os.path.join(opt.vocabdir, opt.alloc_file) for i in range(len(opt.epochs)): train(network, location_path, i) location_path = get_k_round_location_path(i + 1) Communicator.finalize()
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) mm_schedule = momentum_schedule_per_sample(momentum_per_sample) # Instantiate the trainer object as default learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Preparation for distributed learning, which is compatible for normal learner learner = distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits= num_quantization_bits, # non-quantized gradient accumulation distributed_after=warm_up) # no warm start as default progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, rank=distributed.Communicator.rank()) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch( min(mb_size * C.Communicator.num_workers(), epoch_size - sample_count), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save( os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch + 1))) if distributed_flg: distributed.Communicator.finalize() return frcn_output
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def train_model(base_model_file, train_map_file, test_map_file, input_resolution, num_epochs, mb_size, max_train_images, lr_per_mb, momentum_per_mb, l2_reg_weight, dropout_rate, freeze_weights, num_channels=3): #init image_width = input_resolution image_height = input_resolution epoch_size_test = len(readTable(test_map_file)) epoch_size_train = len(readTable(train_map_file)) epoch_size_train = min(epoch_size_train, max_train_images) num_classes = max(toIntegers(getColumn(readTable(train_map_file), 1))) + 1 # Create the minibatch source minibatch_source_train = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes, True) minibatch_source_test = create_mb_source(test_map_file, image_width, image_height, num_channels, num_classes, False) # Define mapping from reader streams to network inputs label_input = input_variable(num_classes) image_input = input_variable((num_channels, image_height, image_width), name="input") input_map = { image_input: minibatch_source_train['features'], label_input: minibatch_source_train['labels'] } # Instantiate the transfer learning model and loss function cntkModel = create_model(base_model_file, image_input, num_classes, dropout_rate, freeze_weights) ce = cross_entropy_with_softmax(cntkModel, label_input) pe = classification_error(cntkModel, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(cntkModel.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (ce, pe), learner, progress_writers) # Run training epochs print( "Training transfer learning model for {0} epochs (epoch_size_train = {1})." .format(num_epochs, epoch_size_train)) errsTest = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: print("Training: processed {0} samples".format(sample_counts)) # Visualize training images # img_data = data[image_input].asarray() # for i in range(len(img_data)): # debugImg = img_data[i].squeeze().swapaxes(0, 1).swapaxes(1, 2) / 255.0 # imshow(debugImg) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsTest.append( cntkComputeTestError(trainer, minibatch_source_test, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsTest, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), test error (green)') plt.draw() return cntkModel
errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z) def sample(net, prime_text='', use_hardmax=True, length=100, temperature=1.0): # Применяем температуру: T < 1 - сглаживание; T=1.0 - без изменений; T > 1 - выделение пиков def apply_temp(p): p = np.power(p, (temperature)) # повторно нормализуем return (p / np.sum(p)) def sample_word(p): if use_hardmax: w = np.argmax(p, axis=2)[0, 0] else: # выбираем случайным образом исходя из вероятностей
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd( biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["CNTK"].NUM_TRAIN_IMAGES, buffered_rpn_proposals=buffered_rpn_proposals) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } use_buffered_proposals = buffered_rpn_proposals is not None progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data, proposals = od_minibatch_source.next_minibatch_with_proposals( min(mb_size, epoch_size - sample_count), input_map=input_map) if use_buffered_proposals: data[rpn_rois_input] = MinibatchData( Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False) # remove dims input if no rpn is required to avoid warnings del data[[k for k in data if '[6]' in str(k)][0]] trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if buffered_rpn_proposals is not None: proposal_provider = ProposalProvider.fromlist(buffered_rpn_proposals, requires_scaling=False) else: proposal_provider = None od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, num_classes=cfg["DATA"].NUM_CLASSES, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, proposal_provider=proposal_provider) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, } if buffered_rpn_proposals is not None: input_map[od_minibatch_source.proposals_si] = rpn_rois_input else: input_map[od_minibatch_source.dims_si] = dims_input progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) # apply model to input z = model_func(input_var_norm, out_dims=num_classes) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 20000 minibatch_size = 64 # Set training parameters lr_per_minibatch = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001], UnitType.minibatch, epoch_size) momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9)) l2_reg_weight = 0.001 # trainer object progress_printer = ProgressPrinter(0) learner = momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentum_time_constant, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), [learner], [progress_printer]) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() #progress_printer = ProgressPrinter(tag='Training') # perform model training stop_run=False batch_index = 0 plot_data = {'batchindex':[], 'loss':[], 'error':[]} for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far # For visualization... plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress batch_index += 1 if trainer.previous_minibatch_evaluation_average < 0.025: stop_run=True break if stop_run: break progress_printer.epoch_summary(with_metric=True) #trainer.save_checkpoint(model_temp_file) # # Evaluation action # epoch_size = 6600 minibatch_size = 32 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') plt.show() plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') plt.show() return softmax(z)
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) ; print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def train_model(cntkModel, params, input_map): log = logging.getLogger("neuralnets1.utils.train_model") mb_size = params['mb_size'] num_epochs = params['num_epochs'] epoch_size_train = params['epoch_size_train'] epoch_size_test = params['epoch_size_test'] minibatch_source_train = params['train_mbs'] minibatch_source_valid = params['valid_mbs'] #minibatch_source_test = params['test_mbs'] ; # Instantiate the trainer object #lr_schedule = learning_rate_schedule(params['learn_rate'], unit=UnitType.minibatch) lr_per_minibatch = learning_parameter_schedule(params['learn_rate'], minibatch_size=mb_size, epoch_size=epoch_size_train) mm_schedule = momentum_schedule(params['beta_momentum_gd']) learner = momentum_sgd(cntkModel.parameters, lr_per_minibatch, mm_schedule, l2_regularization_weight=params['l2_reg_weight']) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (params['ce'], params['pe']), learner, progress_writers) # Run training epochs log.info( 'Training transfer learning model for %s epochs (epoch_size_train = %s ) .' % (num_epochs, epoch_size_train)) # print("Training transfer learning model for {0} epochs (epoch_size_train = {1}).".format(num_epochs, epoch_size_train)) errsVal = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: log.info("Training: processed %s samples" % sample_counts) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsVal.append( cntkComputeTestError(trainer, minibatch_source_valid, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsVal, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), validation error (green)') plt.draw() return cntkModel
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if buffered_rpn_proposals is not None: proposal_provider = ProposalProvider.fromlist(buffered_rpn_proposals, requires_scaling=False) else: proposal_provider = None od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, num_classes=cfg["DATA"].NUM_CLASSES, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, proposal_provider=proposal_provider) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, } if buffered_rpn_proposals is not None: input_map[od_minibatch_source.proposals_si] = rpn_rois_input else: input_map[od_minibatch_source.dims_si] = dims_input progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far #progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: continue #print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def train(reader_train, reader_test, samples_per_epoch, max_amount_of_epochs, samples_per_minibatch, dimensions, classes, learning_rate, output_directory, with_tf): features = input_variable(shape=(dimensions['depth'], dimensions['height'], dimensions['width'])) label = input_variable(shape=len(classes)) # speeds up training normalized_features = element_times(1.0 / 256.0, features) if with_tf: base_model = { 'model_file': os.path.join("..", "..", "Pretrained Models/ResNet_18.model"), 'feature_node_name': 'features', 'last_hidden_node_name': 'z.x', 'image_dims': (3, 224, 224) } model = create_tf_model(base_model, num_classes=len(classes), input_features=normalized_features, freeze=True) else: model = create_model(feature_dimensions=normalized_features, classes=classes) loss = cross_entropy_with_softmax(model, label) metric = classification_error(model, label) learner = momentum_sgd(parameters=model.parameters, lr=learning_rate_schedule(learning_rate, UnitType.minibatch), momentum=0.9, l2_regularization_weight=0.0005) reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs) trainer = Trainer(model=model, criterion=(loss, metric), parameter_learners=[learner], progress_writers=[reporter]) log_number_of_parameters(model) map_input_to_streams_train = { features: reader_train.streams.features, label: reader_train.streams.labels } map_input_to_streams_test = { features: reader_test.streams.features, label: reader_test.streams.labels } training_session( trainer=trainer, mb_source=reader_train, model_inputs_to_streams=map_input_to_streams_train, mb_size=samples_per_minibatch, progress_frequency=samples_per_epoch, checkpoint_config=CheckpointConfig(frequency=samples_per_epoch, filename=os.path.join( output_directory, "ConvNet_Lego_VisiOn"), restore=False), test_config=TestConfig( reader_test, minibatch_size=samples_per_minibatch, model_inputs_to_streams=map_input_to_streams_test)).train() network = {'features': features, 'label': label, 'model': softmax(model)} return network
learner = momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentum_time_constant, l2_regularization_weight=l2_reg_weight) ######### RESTORE TRAINER IF NEEDED trainer = Trainer(z, (ce, pe), [learner], [progress_printer]) # trainer.restore_from_checkpoint(model_temp_file) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() #progress_printer = ProgressPrinter(tag='Training') # perform model training batch_index = 0 plot_data = {'batchindex':[], 'loss':[], 'error':[]} for epoch in range(max_epochs): # loop over epochs sample_count = 0 ev_avg=0 i_count=0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_parameter_schedule(lr_per_mb) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) #progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) batch_index = 0 plot_data = {'batchindex': list(), 'loss': list(), 'error': list()} log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far #if sample_count % (100 * mb_size) == 0: # print ("Processed {0} samples".format(sample_count)) # For visualization... #print("type of plot_data:", type(plot_data), type(plot_data['batchindex']), type(plot_data['loss']),type(plot_data['error'])) plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) #plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') #plt.show() plt.savefig(output_figure_loss, bbox_inches='tight' ) plt.figure(2) #plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') #plt.show() plt.savefig(output_figure_error, bbox_inches='tight') return tl_model
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight, momentum_time_constant, base_path, boSkipTraining=False, debug_output=False, tensorboardLogDir=None): #make sure we use GPU for training if use_default_device().type() == 0: print("WARNING: using CPU for training.") else: print("Using GPU for training.") # Instantiate the Fast R-CNN prediction model image_input = input_variable((3, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) frcn_output, frcn_penultimateLayer = frcn_predictor( image_input, roi_input, num_classes, base_path) if boSkipTraining: print("Using pre-trained DNN without refinement") return frcn_penultimateLayer # Create the minibatch source and define mapping from reader streams to network inputs minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois, base_path, randomize=True) input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # set loss / error functions ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, "graph_frcn.png") # set the progress printer(s) progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboardLogDir != None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboardLogDir, model=frcn_output) progress_writers.append(tensorboard_writer) # Set learning parameters and instantiate the trainer object lr_per_sample = [f / float(num_rois) for f in cntk_lr_per_image] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) if sample_count % 100 == 1: print( "Training in progress: epoch {} of {}, sample count {} of {}" .format(epoch, max_epochs, sample_count, epoch_size)) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboardLogDir != None: for parameter in frcn_output.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch) if debug_output: frcn_output.save_model("frcn_py_%s.model" % (epoch + 1)) return frcn_output
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name = "roi_proposals") label_targets = input_variable((cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold = cfg.BBOX_THRESH, normalize_means = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["CNTK"].NUM_TRAIN_IMAGES, buffered_rpn_proposals=buffered_rpn_proposals) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } use_buffered_proposals = buffered_rpn_proposals is not None progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map) if use_buffered_proposals: data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False) # remove dims input if no rpn is required to avoid warnings del data[[k for k in data if '[6]' in str(k)][0]] trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def train_lm(): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) momentum_schedule = momentum_as_time_constant_schedule( momentum_as_time_constant) gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator( train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence: features, label_sequence: labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 num_trained_samples += token_count num_trained_samples_since_last_report += token_count # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save_model(model_filename) print("Saved model to '%s'" % model_filename)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({ input_sequence: features, label_sequence: labels }, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e * minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable( (cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") label_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot( loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [ x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE ] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_parameter_schedule_per_sample( lr_per_sample_scaled) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [ v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE ] bias_lr_schedule = learning_parameter_schedule_per_sample( bias_lr_per_sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile( cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold=cfg.BBOX_THRESH, normalize_means=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg[ "DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min( cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % 100 == 0: continue #print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) mm_schedule = momentum_schedule_per_sample(momentum_per_sample) # Instantiate the trainer object as default learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Preparation for distributed learning, which is compatible for normal learner learner = distributed.data_parallel_distributed_learner( learner = learner, num_quantization_bits = num_quantization_bits, # non-quantized gradient accumulation distributed_after = warm_up) # no warm start as default progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, rank=distributed.Communicator.rank()) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size * C.Communicator.num_workers(), epoch_size-sample_count), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) if distributed_flg: distributed.Communicator.finalize() return frcn_output