def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer): # CNTK weights new gradient by (1-momentum) for unit gain, # thus we divide Caffe's learning rate by (1-momentum) initial_learning_rate = 0.45 # equal to 0.045 in caffe initial_learning_rate *= minibatch_size / 32 learn_rate_adjust_interval = 2 learn_rate_decrease_factor = 0.94 # Set learning parameters lr_per_mb = [] learning_rate = initial_learning_rate for i in range(0, num_epochs, learn_rate_adjust_interval): lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) learning_rate *= learn_rate_decrease_factor lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9) l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer): # Set learning parameters lr_per_mb = [0.01] * 20 + [0.001] * 20 + [0.0001] * 20 + [0.00001] * 10 + [ 0.000001 ] lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size) mm_schedule = C.learners.momentum_schedule(0.9) l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = C.learners.momentum_sgd( network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight) # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer): # CNTK weights new gradient by (1-momentum) for unit gain, # thus we divide Caffe's learning rate by (1-momentum) initial_learning_rate = 2.0 # equal to 0.2 in caffe initial_learning_rate *= minibatch_size / 128 learn_rate_adjust_interval = 2 learn_rate_decrease_factor = 0.94 # Set learning parameters lr_per_mb = [] learning_rate = initial_learning_rate for i in range(0, num_epochs, learn_rate_adjust_interval): lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) learning_rate *= learn_rate_decrease_factor lr_schedule = learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9) l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = { criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels } training_session( mb_source=train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency=epoch_size, filename=os.path.join( model_path, "SequenceToSequence"), restore=False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)).train()
def create_trainer(network, epoch_size, num_quantization_bits, progress_printer): # Set learning parameters lr_per_mb = [0.01]*20 + [0.001]*20 + [0.0001]*20 + [0.00001]*10 + [0.000001] lr_schedule = C.learning_parameter_schedule(lr_per_mb, epoch_size=epoch_size) mm_schedule = C.learners.momentum_schedule(0.9) l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = C.learners.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=False, l2_regularization_weight=l2_reg_weight) # Since we reuse parameter settings (learning rate, momentum) from Caffe, we set unit_gain to False to ensure consistency parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = {criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels} training_session( mb_source = train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency = epoch_size, filename = os.path.join(model_path, "SequenceToSequence"), restore = False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size) ).train()
def train(network, location_path, id): train_path = os.path.join(opt.datadir, opt.train_file) valid_path = os.path.join(opt.datadir, opt.valid_file) test_path = os.path.join(opt.datadir, opt.test_file) criterion = create_criterion(network) ce, pe = criterion[0], criterion[1] learner = create_learner(network['model']) learner = data_parallel_distributed_learner(learner) communicator = learner.communicator() trainer = C.Trainer(network['model'], (ce, pe), learner) # loop over epoch for epoch in range(opt.epochs[id]): source = DataSource(train_path, opt.vocab_file, location_path, opt.seqlength, opt.batchsize) loss, metric, tokens, batch_id = 0, 0, 0, 0 start_time = datetime.datetime.now() flag = True # loop over minibatch in the epoch while flag: mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(), Communicator.num_workers(), communicator.rank()) trainer.train_minibatch({ network['row']: mb[source.input1], network['col']: mb[source.input2], network['row_label']: mb[source.label1], network['col_label']: mb[source.label2] }) samples = trainer.previous_minibatch_sample_count loss += trainer.previous_minibatch_loss_average * samples metric += trainer.previous_minibatch_evaluation_average * samples tokens += samples batch_id += 1 if Communicator.num_workers() > 1: communicator.barrier() if batch_id != 0 and batch_id % opt.freq == 0: diff_time = (datetime.datetime.now() - start_time) print("Epoch {:2}: Minibatch [{:5} - {:5}], loss = {:.6f}, error = {:.6f}, speed = {:3} tokens/s".format( epoch + 1, batch_id - opt.freq + 1, batch_id, loss / tokens, metric / tokens, tokens // diff_time.seconds)) flag = not mb[source.input1].sweep_end # Evaluation action if communicator.is_main(): valid_error = evaluate(network, valid_path, location_path) test_error = evaluate(network, test_path, location_path) print("Epoch {:2} Done : Valid error = {:.6f}, Test error = {:.6f}".format(epoch + 1, valid_error, test_error)) network['model'].save(os.path.join(opt.outputdir, 'round{}_epoch{}_'.format(id, epoch) + opt.save)) if Communicator.num_workers() > 1: communicator.barrier() # word allocate action row_loss, col_loss = calculate_loss_vector(network, train_path, location_path, communicator) if Communicator.num_workers() > 1: try: from mpi4py import MPI comm = MPI.COMM_WORLD if communicator.is_main(): for i in range(1, Communicator.num_workers()): row_loss_i, col_loss_i = comm.recv(source=i) row_loss += row_loss_i col_loss += col_loss_i else: data_send = [row_loss, col_loss] comm.send(data_send, 0) except: raise RuntimeError("Please install mpi4py if uses multi gpus!") communicator.barrier() if communicator.is_main(): allocate_table(row_loss, col_loss, opt.vocabsize, vocab_sqrt, opt.vocab_file, get_k_round_location_path(id + 1))
def train(input_dir, output_dir, num_epochs): ''' Coordinates model creation and training; minibatch creation ''' num_landcover_classes = 5 num_color_channels = 4 block_size = 256 padding = int(block_size / 4) my_rank = distributed.Communicator.rank() number_of_workers = distributed.Communicator.num_workers() os.makedirs(output_dir, exist_ok=True) # We extract 160 sample regions from an input image before moving along to # the next image file. Our epoch size is 16,000 samples. minibatch_size = 10 minibatches_per_image = 160 minibatches_per_epoch = 1600 epoch_size = minibatch_size * minibatches_per_epoch # Define the input variables f_dim = (num_color_channels, block_size, block_size) l_dim = (num_landcover_classes, block_size, block_size) feature = cntk.input_variable(f_dim, np.float32) label = cntk.input_variable(l_dim, np.float32) # Define the minibatch source minibatch_source = MyDataSource(f_dim, l_dim, number_of_workers, input_dir, minibatches_per_image) input_map = { feature: minibatch_source.streams.features, label: minibatch_source.streams.labels } # Define the model model = model_mini_pub.model(num_landcover_classes, block_size, 2, [64, 32, 32, 32])(feature) # Define the loss function and metric. Note that loss is not computed # directly on the model's output; the edges are first dropped. output = center_square( cntk.reshape(model, (num_landcover_classes, block_size, block_size)), block_size, padding) label_center = center_square(label, block_size, padding) mean_ce, pe = criteria(label_center, output, block_size, num_landcover_classes, [0.0, 1.0, 1.0, 1.0, 1.0]) # Create the progress writer, learner, and trainer (which will be a # distributed trainer if number_of_workers > 1) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=epoch_size, rank=my_rank) ] lr_per_mb = [0.0001] * 30 + [0.00001] * 30 + [0.000001] lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=cntk.UnitType.sample) learner = cntk.rmsprop(model.parameters, lr_schedule, 0.95, 1.1, 0.9, 1.1, 0.9, l2_regularization_weight=0.00001) if number_of_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(output, (mean_ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(output, (mean_ce, pe), learner, progress_writers) # Perform the training! Note that some progress output will be generated by # each of the workers. if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(number_of_workers)) print('Printing progress every {} minibatches'.format( minibatches_per_epoch)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=minibatch_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'trained_checkpoint.model'), preserve_all=True), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'trained.model')) return
def retrain_model(map_filename, output_dir, num_classes, epoch_size, model_filename, num_epochs, model_type, retraining_type): ''' Coordinates retraining after MAP file creation ''' # load minibatch and model minibatch_source = create_minibatch_source(map_filename, num_classes) image_input = cntk.ops.input_variable((3, 224, 224)) label_input = cntk.ops.input_variable((num_classes)) input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } if model_type == 'alexnet': model = load_alexnet_model(image_input, num_classes, model_filename, retraining_type) elif model_type == 'resnet18': model = load_resnet18_model(image_input, num_classes, model_filename, retraining_type) # Set learning parameters ce = cntk.losses.cross_entropy_with_softmax(model, label_input) pe = cntk.metrics.classification_error(model, label_input) l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001] momentum_time_constant = 10 mb_size = 16 lr_schedule = cntk.learners.learning_rate_schedule( lr_per_sample, unit=cntk.UnitType.sample) mm_schedule = cntk.learners.momentum_as_time_constant_schedule( momentum_time_constant) # Instantiate the appropriate trainer object my_rank = distributed.Communicator.rank() num_workers = distributed.Communicator.num_workers() num_minibatches = int(np.ceil(epoch_size / mb_size)) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=num_minibatches, rank=my_rank) ] learner = cntk.learners.fsadagrad(parameters=model.parameters, lr=lr_schedule, momentum=mm_schedule, l2_regularization_weight=l2_reg_weight) if num_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(model, (ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) # Print summary lines to stdout and perform training if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(num_workers)) print('Printing progress every {} minibatches'.format(num_minibatches)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=mb_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'retrained_checkpoint.model')), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'retrained.model')) return (my_rank)