def run(train_data, test_data, validation_data, minibatch_size=25000, epoch_size=50000, num_quantization_bits=32, warm_up=0, num_epochs=10, restore=True, log_to_file='logs.txt', num_mbs_per_log=1, profiling=True): _cntk_py.set_computation_network_trace_level(0) # Create the network to be trained network = create_resnet_network() # Define the ProgessWriter progress_writers = [C.logging.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=os.path.join(OUTPUTDIR, log_to_file), rank=C.train.distributed.Communicator.rank(), num_epochs=num_epochs, distributed_freq=None)] # Create the trainer trainer = create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers) # Create the input data sources train_source = create_image_mb_source(train_data, train=True, total_number_of_samples=num_epochs * epoch_size) test_source = create_image_mb_source(test_data, train=False, total_number_of_samples=C.io.FULL_DATA_SWEEP) validation_source = create_image_mb_source(validation_data, train=False, total_number_of_samples=C.io.FULL_DATA_SWEEP) # Call the train_model function train_model(network, trainer, train_source, validation_source, test_source, minibatch_size, epoch_size, restore, profiling)
def run_cifar_convnet_distributed(): try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now create_train_reader = lambda data_size: create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size, 0) test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP) distributed_after_samples = 0 num_quantization_bits = 32 create_dist_learner = lambda learner: distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits=num_quantization_bits, distributed_after=distributed_after_samples) return convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=1, num_mbs_per_log=None)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = convnet_cifar10_dataaug(reader_train, reader_test, max_epochs=1) expected_test_error = 0.617 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def htk_lstm_truncated(features_file, labels_file, label_mapping_file, minibatch_size=64, epoch_size=640000, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, num_mbs_per_log=None, gen_heartbeat=False,log_to_file=None, tensorboard_logdir=None): _cntk_py.set_computation_network_trace_level(0) network = create_recurrent_network() progress_writers = [cntk.utils.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs)] if tensorboard_logdir is not None: progress_writers.append(cntk.utils.TensorBoardProgressWriter( freq=num_mbs_per_log, log_dir=tensorboard_logdir, rank=Communicator.rank(), model=network['output'])) trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers) train_source = create_mb_source(features_file, labels_file, label_mapping_file, total_number_of_samples=max_epochs * epoch_size) # Testing with training data, just for testing purposes test_source = create_mb_source(features_file, labels_file, label_mapping_file, total_number_of_samples=max_epochs * epoch_size) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data=os.path.join(base_path, 'train_map.txt') test_data=os.path.join(base_path, 'test_map.txt') mean_data=os.path.join(base_path, 'CIFAR-10_mean.xml') test_error = resnet_cifar10(train_data, test_data, mean_data, 'resnet20', epoch_size=512, max_epochs=2) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) distributed.Communicator.finalize()
def test_alexnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_ImageNet_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now # for test purpose we train and test on same data train_data = os.path.join(base_path, 'val1024_map.txt') test_data = os.path.join(base_path, 'val1024_map.txt') test_error = alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=16, epoch_size=64, max_epochs=2)
def test_ucf11_conv3d_error(device_id): # Skip for now. if True: #cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Video/DataSets/UCF11".split("/")) except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Video/DataSets/UCF11".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # For performance reason, we will use test data for both training and testing. num_output_classes = 11 # train_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, True) # test_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False) test_error = 0.8437 #conv3d_ucf11(train_reader, test_reader, max_epochs=1) expected_test_error = 0.8437 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def test_ucf11_conv3d_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) prepare_UCF11_data() base_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Video/DataSets/UCF11".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) num_output_classes = 11 train_reader = VideoReader(os.path.join(base_path, 'train_map.csv'), num_output_classes, True, 100) test_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False, 40) test_error = conv3d_ucf11(train_reader, test_reader, max_epochs=1) expected_test_error = 0.8 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, block_size=3200, warm_up=0, minibatch_size=256, epoch_size=1281167, max_epochs=112, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=True): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_alexnet() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer, block_size, warm_up) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source( test_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
def convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits=32, max_epochs=2, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False): _cntk_py.set_computation_network_trace_level(0) epoch_size = 50000 progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_conv_network() trainer = create_trainer(network, epoch_size, num_quantization_bits) train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source( test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, progress_printer, epoch_size)
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # to become invariant to initialization order, which is a valid change # test of the example itself # this emulates the main code in the PY file reader = create_reader(data_dir + "/atis.train.ctf") model = create_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU reader = create_reader(data_dir + "/atis.train.ctf") model = create_test_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) log_number_of_parameters(model, trace_level=1) print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now test_error = train_and_evaluate(base_path, total_epochs=5) expected_test_error = 0.5 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def bn_inception_train_and_eval(train_data, test_data, mean_data, num_quantization_bits=32, epoch_size=1281167, max_epochs=300, minibatch_size=None, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, scale_up=False, profiling=False): _cntk_py.set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minibatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy if minibatch_size is None: mb_size = 32 * (Communicator.num_workers() if scale_up else 1) else: mb_size = minibatch_size progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, mb_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, mb_size, epoch_size, restore, profiling)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now test_error = train_and_evaluate(base_path, total_epochs=5) expected_test_error = 0.5 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=32, epoch_size=1281167, max_epochs=300, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, profiler_dir=None): _cntk_py.set_computation_network_trace_level(1) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, minibatch_size) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source( test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, progress_printer, max_epochs, minibatch_size, epoch_size, restore, profiler_dir)
def test_ucf11_conv3d_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) prepare_UCF11_data() base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Video/DataSets/UCF11".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) num_output_classes = 11 train_reader = VideoReader(os.path.join(base_path, 'train_map.csv'), num_output_classes, True, 100) test_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False, 40) test_error = conv3d_ucf11(train_reader, test_reader, max_epochs=1) expected_test_error = 0.8 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def bn_inception_train_and_eval(train_data, test_data, mean_data, num_quantization_bits=32, epoch_size=50000, max_epochs=200, minibatch_size=None, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, scale_up=False, profiling=False): _cntk_py.set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minibatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy if minibatch_size is None: mb_size = 128 * (Communicator.num_workers() if scale_up else 1) else: mb_size = minibatch_size progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, mb_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, max_epochs, mb_size, epoch_size, restore, profiling)
def test_ucf11_conv3d_error(device_id): # Skip for now. if True: #cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Video/DataSets/UCF11".split("/")) except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Video/DataSets/UCF11".split("/")) base_path = os.path.normpath(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # For performance reason, we will use test data for both training and testing. num_output_classes = 11 # train_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, True) # test_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False) test_error = 0.8437 #conv3d_ucf11(train_reader, test_reader, max_epochs=1) expected_test_error = 0.8437 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64, epoch_size=50000, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=2, restore=False, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, profiling=False, tensorboard_logdir=None): _cntk_py.set_computation_network_trace_level(0) network = create_conv_network() progress_printer = cntk.utils.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=cntk.distributed.Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) tensorboard_writer = cntk.utils.TensorBoardProgressWriter( freq=num_mbs_per_log, log_dir=tensorboard_logdir if tensorboard_logdir is not None else 'log', rank=cntk.distributed.Communicator.rank(), model=network['output']) trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up) train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, [progress_printer, tensorboard_writer], minibatch_size, epoch_size, restore, profiling)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
def run_cifar_convnet_distributed(): try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data = os.path.join(base_path, 'train_map.txt') mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') test_data = os.path.join(base_path, 'test_map.txt') num_quantization_bits = 32 return convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits, epoch_size=512, max_epochs=2)
def convnet_cifar10_dataaug(train_data, test_data, mean_data, minibatch_size=64, epoch_size=50000, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=2, restore=False, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, profiling=False, tensorboard_logdir=None, model_path="."): _cntk_py.set_computation_network_trace_level(0) network = create_conv_network() distributed_sync_report_freq = None if block_size is not None: distributed_sync_report_freq = 1 progress_writers = [ C.logging.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=C.train.distributed.Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs, distributed_freq=distributed_sync_report_freq) ] if tensorboard_logdir is not None: progress_writers.append( C.logging.TensorBoardProgressWriter( freq=num_mbs_per_log, log_dir=tensorboard_logdir, rank=C.train.distributed.Communicator.rank(), model=network['output'])) trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers) train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source( test_data, mean_data, train=False, total_number_of_samples=C.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore, profiling, model_path)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_image_mb_source(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, total_number_of_samples=1 * 50000) reader_test = create_image_mb_source( os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, tensorboard_logdir=tb_logdir) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) files = 0 for file in os.listdir(tb_logdir): assert file.startswith("events.out.tfevents") files += 1 assert files == 1
def test_cifar_convnet_error(device_id): if platform.system() == 'Windows': pytest.skip('test skipped on Windows') set_default_device(cntk_device(device_id)) try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, 0) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) distributed_after_samples = 0 num_quantization_bits = 32 distributed_trainer = distributed.data_parallel_distributed_trainer( num_quantization_bits=num_quantization_bits, distributed_after=distributed_after_samples) test_error = convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=1) expected_test_error = 0.617 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def htk_lstm_truncated(features_file, labels_file, label_mapping_file, minibatch_size=64, epoch_size=640000, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, num_mbs_per_log=None, gen_heartbeat=False, log_to_file=None, tensorboard_logdir=None): _cntk_py.set_computation_network_trace_level(0) network = create_recurrent_network() progress_writers = [ cntk.utils.ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) ] if tensorboard_logdir is not None: progress_writers.append( cntk.utils.TensorBoardProgressWriter(freq=num_mbs_per_log, log_dir=tensorboard_logdir, rank=Communicator.rank(), model=network['output'])) trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers) train_source = create_mb_source(features_file, labels_file, label_mapping_file, total_number_of_samples=max_epochs * epoch_size) # Testing with training data, just for testing purposes test_source = create_mb_source(features_file, labels_file, label_mapping_file, total_number_of_samples=max_epochs * epoch_size) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size)
def convnet_cifar10(train_source, test_source, epoch_size, num_convolution_layers=2, minibatch_size=64, max_epochs=30, log_file=None, tboard_log_dir='.', results_path=_MODEL_PATH): _cntk_py.set_computation_network_trace_level(0) logger.info("""Running network with: {num_convolution_layers} convolution layers {minibatch_size} minibatch size for {max_epochs} epochs""".format( num_convolution_layers=num_convolution_layers, minibatch_size=minibatch_size, max_epochs=max_epochs)) network = create_network(num_convolution_layers) progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file, rank=cntk.Communicator.rank(), num_epochs=max_epochs) tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tboard_log_dir, model=network['output']) trainer = create_trainer(network, minibatch_size, epoch_size, [progress_printer, tensorboard_writer]) cv_config = CrossValidationConfig( minibatch_source=test_source, minibatch_size=16, callback=create_results_callback( os.path.join(results_path, "model_results.json"), num_convolution_layers=num_convolution_layers, minibatch_size=minibatch_size, max_epochs=max_epochs)) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore=False, cv_config=cv_config) network['output'].save(os.path.join(results_path, _MODEL_NAME))
def bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=32, epoch_size=1281167, max_epochs=300, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, profiler_dir=None, testing_parameters=(5000,32)): _cntk_py.set_computation_network_trace_level(1) progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, minibatch_size) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) return train_and_test(network, trainer, train_source, test_source, progress_printer, max_epochs, minibatch_size, epoch_size, restore, profiler_dir, testing_parameters)
def bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=128, epoch_size=50000, max_epochs=200, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, profiler_dir=None, testing_parameters=(10000,128)): _cntk_py.set_computation_network_trace_level(0) progress_writers = [ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, gen_heartbeat=gen_heartbeat, num_epochs=max_epochs)] network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, minibatch_size, progress_writers) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) return train_and_test(network, trainer, train_source, test_source, max_epochs, minibatch_size, epoch_size, restore, profiler_dir, progress_writers[0], testing_parameters)
def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, scale_up=False, profiling=False): set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minimatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy minibatch_size = 128 * (Communicator.num_workers() if scale_up else 1) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_resnet_network(network_name) trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer) train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source( test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, profiling)
def alexnet_train_and_eval(train_data, test_data, num_quantization_bits=32, block_size=3200, warm_up=0, minibatch_size=256, epoch_size = 1281167, max_epochs=112, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=True): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_alexnet() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer, block_size, warm_up) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'test_map.txt') mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') test_error = resnet_cifar10(train_data, test_data, mean_data, 'resnet20', epoch_size=512, max_epochs=2) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) distributed.Communicator.finalize()
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now distributed_learner_factory = lambda learner: distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits=32, distributed_after=0) reader_train_factory = lambda data_size: create_reader( os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size) test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP) test_error = train_and_evaluate(reader_train_factory, test_reader, 'resnet20', 5, distributed_learner_factory) expected_test_error = 0.282 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE) distributed.Communicator.finalize()
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
def run_cifar_convnet_distributed(): try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt # and CIFAR-10_mean.xml in the base_path. except KeyError: base_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now create_train_reader = lambda data_size: create_reader( os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size, 0) test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP) distributed_after_samples = 0 num_quantization_bits = 32 create_dist_learner = lambda learner: distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits=num_quantization_bits, distributed_after=distributed_after_samples) return convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=1, num_mbs_per_log=None)
def sequence_to_sequence_translator(train_data, test_data, epoch_size=908241, num_quantization_bits=default_quantization_bits, block_size=3200, warm_up=0, minibatch_size=72, max_epochs=10, randomize_data=False, log_to_file=None, num_mbs_per_log=10, gen_heartbeat=False): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) input_vocab_dim = 69 label_vocab_dim = 69 network = create_network(input_vocab_dim, label_vocab_dim) trainer = create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up) train_reader = create_reader(train_data, randomize_data, input_vocab_dim, label_vocab_dim, size=max_epochs*epoch_size) test_reader = create_reader(test_data, False, input_vocab_dim, label_vocab_dim, size=cntk.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_reader, test_reader, progress_printer, epoch_size, minibatch_size)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, tensorboard_logdir=tb_logdir) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) files = 0 for file in os.listdir(tb_logdir): assert file.startswith("events.out.tfevents") files += 1 assert files == 1
def run(train_data, test_data, validation_data, minibatch_size=200, epoch_size=50000, num_quantization_bits=32, warm_up=0, num_epochs=100, restore=True, log_to_file='logs.txt', num_mbs_per_log=100, profiling=True): _cntk_py.set_computation_network_trace_level(0) network = create_resnet_network() progress_writers = [ C.logging.ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=C.train.distributed.Communicator.rank(), num_epochs=num_epochs, distributed_freq=None) ] trainer = create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers) train_source = create_image_mb_source(train_data, train=True, total_number_of_samples=num_epochs * epoch_size) test_source = create_image_mb_source( test_data, train=False, total_number_of_samples=C.io.FULL_DATA_SWEEP) validation_source = create_image_mb_source( validation_data, train=False, total_number_of_samples=C.io.FULL_DATA_SWEEP) train_model(network, trainer, train_source, validation_source, test_source, minibatch_size, epoch_size, restore, profiling)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') try: base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/DataSets/CIFAR-10".split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now distributed_learner_factory = lambda learner: distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits=32, distributed_after=0) reader_train_factory = lambda data_size: create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size) test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP) test_error = train_and_evaluate(reader_train_factory, test_reader, 'resnet20', 5, distributed_learner_factory) expected_test_error = 0.282 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE) distributed.Communicator.finalize()
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size=1281167, max_epochs=80, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, testing=False): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_vgg19() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) if testing: # reduce number of samples for validation when testing num_of_validation_samples = max_epochs * epoch_size * 10 else: num_of_validation_samples = FULL_DATA_SWEEP test_source = create_image_mb_source( test_data, False, total_number_of_samples=num_of_validation_samples) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
def resnet_cifar10(train_data, test_data, mean_data, network_name, epoch_size, num_quantization_bits=32, block_size=3200, warm_up=0, max_epochs=5, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, scale_up=False, profiling=False): set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minimatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy minibatch_size = 128 * (Communicator.num_workers() if scale_up else 1) progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_resnet_network(network_name) trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up) train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size, profiling)
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, testing=False): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_vgg19() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) if testing: # reduce number of samples for validation when testing num_of_validation_samples = max_epochs * epoch_size * 10 else: num_of_validation_samples = FULL_DATA_SWEEP test_source = create_image_mb_source(test_data, False, total_number_of_samples=num_of_validation_samples) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
def run_cifar_convnet_distributed(): base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data = os.path.join(base_path, 'train_map.txt') mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') test_data = os.path.join(base_path, 'test_map.txt') num_quantization_bits = 32 return convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits, epoch_size=512, max_epochs=2)
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed set_computation_network_trace_level(1) set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change # test of the example itself # this emulates the main code in the PY file reader = create_reader(data_dir + "/atis.train.ctf") model = create_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU reader = create_reader(data_dir + "/atis.train.ctf") model = create_test_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) log_number_of_parameters(model, trace_level=1) ; print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs = 80): _cntk_py.set_computation_network_trace_level(1) # Input variables denoting the features and label data input_var = cntk.ops.input_variable((num_channels, image_height, image_width)) label_var = cntk.ops.input_variable((num_classes)) # apply model to input scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) with cntk.layers.default_options (activation=cntk.ops.relu, pad=True): z = cntk.models.Sequential([ cntk.models.For(range(2), lambda : [ cntk.layers.Convolution2D((3,3), 64), cntk.layers.Convolution2D((3,3), 64), LocalResponseNormalization (1.0, 4, 0.001, 0.75), cntk.layers.MaxPooling((3,3), (2,2)) ]), cntk.models.For(range(2), lambda i: [ cntk.layers.Dense([256,128][i]), cntk.layers.Dropout(0.5) ]), cntk.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, label_var) pe = cntk.ops.classification_error(z, label_var) # training config minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20 + [600]*20 + [1200] mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain = True, l2_regularization_weight = l2_reg_weight) trainer = cntk.Trainer(z, (ce, pe), learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } cntk.utils.log_number_of_parameters(z) ; print() progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs) # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def train_and_evaluate(create_train_reader, test_reader, network_name, max_epochs, create_dist_learner, scale_up=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters epoch_size = 50000 # for now we manually specify epoch size # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minimatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy minibatch_size = 128 * (distributed.Communicator.num_workers() if scale_up else 1) momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # trainer object learner = create_dist_learner(momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight)) trainer = Trainer(z, ce, pe, learner) total_number_of_samples = max_epochs * epoch_size train_reader=create_train_reader(total_number_of_samples) # define mapping from reader streams to network inputs input_map = { input_var: train_reader.streams.features, label_var: train_reader.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training current_epoch=0 updated=True while updated: data=train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch. updated=trainer.train_minibatch(data) # update model with it progress_printer.update_with_trainer(trainer, with_metric=True) # log progress epoch_index = int(trainer.total_number_of_samples_seen/epoch_size) if current_epoch != epoch_index: # new epoch reached progress_printer.epoch_summary(with_metric=True) current_epoch=epoch_index trainer.save_checkpoint(os.path.join(model_path, network_name + "_{}.dnn".format(current_epoch))) # Evaluation parameters epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while True: data = test_reader.next_minibatch(minibatch_size, input_map=input_map) if not data: break; local_mb_samples=data[label_var].num_samples metric_numer += trainer.test_minibatch(data) * local_mb_samples metric_denom += local_mb_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=80, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = cntk.ops.input_variable((num_channels, image_height, image_width)) label_var = cntk.ops.input_variable((num_classes)) # apply model to input scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) with cntk.layers.default_options(activation=cntk.ops.relu, pad=True): z = cntk.models.Sequential([ cntk.models.LayerStack(2, lambda : [ cntk.layers.Convolution((3,3), 64), cntk.layers.Convolution((3,3), 64), cntk.layers.MaxPooling((3,3), (2,2)) ]), cntk.models.LayerStack(2, lambda i: [ cntk.layers.Dense([256,128][i]), cntk.layers.Dropout(0.5) ]), cntk.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, label_var) pe = cntk.ops.classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20 + [600]*20 + [1200] mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = create_dist_learner( cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)) trainer = cntk.Trainer(z, ce, pe, learner) total_number_of_samples = max_epochs * epoch_size train_reader = create_train_reader(total_number_of_samples) # define mapping from reader streams to network inputs input_map = { input_var: train_reader.streams.features, label_var: train_reader.streams.labels } cntk.utils.log_number_of_parameters(z) ; print() progress_printer = cntk.utils.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, distributed_learner=learner, gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) # perform model training updated=True epoch=0 while updated: data = train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch. updated = trainer.train_minibatch(data) # update model with it progress_printer.update_with_trainer(trainer, with_metric=True) # log progress current_epoch = int(trainer.total_number_of_samples_seen/epoch_size) if epoch != current_epoch: progress_printer.epoch_summary(with_metric=True) epoch = current_epoch trainer.save_checkpoint(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 while True: data = test_reader.next_minibatch(minibatch_size, input_map=input_map) if not data: break local_mb_samples=data[label_var].num_samples metric_numer += trainer.test_minibatch(data) * local_mb_samples metric_denom += local_mb_samples minibatch_index += 1 fin_msg = "Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom) progress_printer.end_progress_print(fin_msg) print("") print(fin_msg) print("") return metric_numer/metric_denom
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, tensorboard_logdir=None): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
def train_and_evaluate(reader_train, reader_test, network_name): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 128 max_epochs = 160 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, network_name + "_{}.dnn".format(epoch))) # Evaluation parameters epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=80, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = cntk.ops.input_variable((num_channels, image_height, image_width)) label_var = cntk.ops.input_variable((num_classes)) # apply model to input scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) with cntk.layers.default_options(activation=cntk.ops.relu, pad=True): z = cntk.models.Sequential([ cntk.models.LayerStack(2, lambda : [ cntk.layers.Convolution((3,3), 64), cntk.layers.Convolution((3,3), 64), cntk.layers.MaxPooling((3,3), (2,2)) ]), cntk.models.LayerStack(2, lambda i: [ cntk.layers.Dense([256,128][i]), cntk.layers.Dropout(0.5) ]), cntk.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, label_var) pe = cntk.ops.classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20 + [600]*20 + [1200] mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = create_dist_learner( cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)) trainer = cntk.Trainer(z, ce, pe, learner) total_number_of_samples = max_epochs * epoch_size train_reader = create_train_reader(total_number_of_samples) # define mapping from reader streams to network inputs input_map = { input_var: train_reader.streams.features, label_var: train_reader.streams.labels } cntk.utils.log_number_of_parameters(z) ; print() progress_printer = cntk.utils.ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, distributed_learner=learner, gen_heartbeat=gen_heartbeat) # perform model training updated=True epoch=0 while updated: data = train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch. updated = trainer.train_minibatch(data) # update model with it progress_printer.update_with_trainer(trainer, with_metric=True) # log progress current_epoch = int(trainer.total_number_of_samples_seen/epoch_size) if epoch != current_epoch: progress_printer.epoch_summary(with_metric=True) epoch = current_epoch trainer.save_checkpoint(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 while True: data = test_reader.next_minibatch(minibatch_size, input_map=input_map) if not data: break local_mb_samples=data[label_var].num_samples metric_numer += trainer.test_minibatch(data) * local_mb_samples metric_denom += local_mb_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") if __name__=='__main__': # TODO: leave these in for now as debugging aids; remove for beta from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) # TODO: remove debugging facilities once this all works set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False) train_and_evaluate(reader_train, reader_test, max_epochs=5)
def convnet_cifar10(debug_output=False): _cntk_py.set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = cntk.ops.input_variable((num_channels, image_height, image_width), np.float32) label_var = cntk.ops.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = cntk.ops.minus(input_var, cntk.ops.constant(128)) scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_removemean) with cntk.layers.default_options(activation=cntk.ops.relu, pad=True): z = cntk.layers.Sequential([ cntk.layers.For(range(2), lambda : [ cntk.layers.Convolution2D((3,3), 64), cntk.layers.Convolution2D((3,3), 64), cntk.layers.MaxPooling((3,3), (2,2)) ]), cntk.layers.For(range(2), lambda i: [ cntk.layers.Dense([256,128][i]), cntk.layers.Dropout(0.5) ]), cntk.layers.Dense(num_output_classes, activation=None) ])(scaled_input) ce = cntk.ops.cross_entropy_with_softmax(z, label_var) pe = cntk.ops.classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 max_epochs = 30 # Set learning parameters lr_per_sample = [0.0015625]*10 + [0.00046875]*10 + [0.00015625] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, cntk.learner.UnitType.sample, epoch_size) mm_time_constant = [0]*20 + [-minibatch_size/np.log(0.9)] mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) progress_printer = cntk.utils.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } cntk.utils.log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() z.save(os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs = 80): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20+[600]*20+[1200] mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner, distributed_trainer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if distributed_trainer.communicator().current_worker().global_rank == 0: persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += trainer.previous_minibatch_sample_count minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input_variable((num_channels, sequence_length, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options (activation=relu): z = Sequential([ Convolution3D((3,3,3), 64, pad=True), MaxPooling((1,2,2), (1,2,2)), For(range(3), lambda i: [ Convolution3D((3,3,3), [96, 128, 128][i], pad=True), Convolution3D((3,3,3), [96, 128, 128][i], pad=True), MaxPooling((2,2,2), (2,2,2)) ]), For(range(2), lambda : [ Dense(1024), Dropout(0.5) ]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01]*10+[0.001]*10+[0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch(minibatch_size) trainer.train_minibatch({input_var : videos, label_var : labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch(minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom