def test_cifar_resnet_distributed_block_momentum(device_id): params = [ "-e", "2", "-datadir", prepare_CIFAR10_data(), "-b", "3200", "-es", "512", "-device", "0" ] mpiexec_test(device_id, script_under_test, params, 0.89, False, 5)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) if not is_1bit_sgd: pytest.skip('test only runs in 1-bit SGD') base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data=os.path.join(base_path, 'train_map.txt') test_data=os.path.join(base_path, 'test_map.txt') mean_data=os.path.join(base_path, 'CIFAR-10_mean.xml') test_error = resnet_cifar10(train_data, test_data, mean_data, 'resnet20', epoch_size=512, max_epochs=2) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) distributed.Communicator.finalize()
def train_cifar_resnet_for_eval(test_device, output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) if test_device == 'cpu': print('train cifar_resnet only on GPU device. Use pre-trained models.') else: print('training cifar_resnet on GPU device...') reader_train = TrainResNet_CIFAR10.create_reader( os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = TrainResNet_CIFAR10.create_reader( os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir) return base_path
def test_bn_inception_cifar(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'test_map.txt') try: error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500, max_epochs=8, restore=False, testing_parameters=(500, 16)) finally: os.chdir(current_path) expected_error = 0.88 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_distributed_1bitsgd(device_id): params = [ "-e", "2", "-datadir", prepare_CIFAR10_data(), "-q", "1", "-es", "512", "-device", "0" ] mpiexec_test(device_id, script_under_test, params, 0.86, False, 3)
def test_cifar_convnet_distributed(device_id): # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-tensorboard_logdir", tb_logdir, "-q", "32", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.75, False, per_minibatch_tolerance=1e-2 ) # False since different workers may have different #cores # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name. tb_files = 0 for tb_file in os.listdir(tb_logdir): assert tb_file.startswith("events.out.tfevents") tb_files += 1 assert tb_files == 1
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
def test_bn_inception_cifar(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'test_map.txt') try: error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500, max_epochs=8, restore=False, testing_parameters=(500,16)) finally: os.chdir(current_path) expected_error = 0.88 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train1 = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) reader_test1 = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model1 = create_convnet_cifar10_model(num_classes=10) train_loss1 = train_model(reader_train1, reader_test1, model1, epoch_size=128, max_epochs=1)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_convnet_cifar10_model(num_classes=10) model.update_signature((num_channels, image_height, image_width)) criterion = create_criterion_function(model, normalize=lambda x: x / 256) train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5) expected_loss_metric = (2.2963, 0.9062) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_distributed_block_momentum(device_id): params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-b", "1600", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.78, False, 10)
def test_cifar_resnet_distributed_block_momentum(device_id): params = [ "-e", "2", "-datadir", prepare_CIFAR10_data(), "-b", "3200", "-es", "512", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.89, False, 5)
def test_cifar_resnet_distributed_1bitsgd(device_id): params = [ "-e", "2", "-datadir", prepare_CIFAR10_data(), "-q", "1", "-es", "512", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.86, False, 3)
def test_bn_inception_cifar_distributed(device_id): params = [ "-n", "8", "-datadir", prepare_CIFAR10_data(), "-q", "32", "-e", "500", "-m", "16", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.88, True)
def test_cifar_convnet_distributed_1bitsgd(device_id): params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-q", "1", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.75, False, per_minibatch_tolerance=1e-2)
def test_cifar_convnet_distributed_block_momentum(device_id): params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-b", "1600", "-r", "-device", "0" ] mpiexec_test(device_id, script_under_test, params, 0.78, False, 10)
def test_bn_inception_cifar_distributed(device_id): params = [ "-n", "8", "-datadir", prepare_CIFAR10_data(), "-q", "32", "-e", "500", "-m", "16", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.88, True)
def test_cifar_convnet_distributed_1bitsgd(device_id): params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-q", "1", "-r", "-device", "0" ] mpiexec_test(device_id, script_under_test, params, 0.75, True)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_image_mb_source(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, total_number_of_samples=1 * 50000) reader_test = create_image_mb_source( os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP) # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, tensorboard_logdir=tb_logdir) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) files = 0 for file in os.listdir(tb_logdir): assert file.startswith("events.out.tfevents") files += 1 assert files == 1
def test_binary_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions( model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def unzip_data(output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) # unzip test images for eval with zipfile.ZipFile(os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip: for fn in range(6): myzip.extract('data/train/%05d.png'%(fn), output_dir)
def unzip_data(output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) # unzip test images for eval with zipfile.ZipFile( os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip: for fn in range(6): myzip.extract('data/train/%05d.png' % (fn), output_dir)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_error = convnet_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
def train_cifar_resnet_for_eval(test_device, output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) if test_device == 'cpu': print('train cifar_resnet only on GPU device. Use pre-trained models.') else: print('training cifar_resnet on GPU device...') reader_train = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir) return base_path
def test_binary_convnet_error(device_id): if not native_convolve_function_registered: pytest.skip("Could not find {0} library. " "Please check if HALIDE_PATH is configured properly " "and try building {1} again" .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'), 'Extnsibiliy\\BinaryConvolution')) if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_convnet_cifar10_model(num_classes=10) model.update_signature((num_channels, image_height, image_width)) criterion = create_criterion_function(model, normalize=lambda x: x / 256) train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5) expected_loss_metric = (2.2963, 0.9062) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, tensorboard_logdir=tb_logdir) # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add # tolerance back once convolution operator is determinsitic. # expected_test_error = 0.282 # assert np.allclose(test_error, expected_test_error, # atol=TOLERANCE_ABSOLUTE) files = 0 for file in os.listdir(tb_logdir): assert file.startswith("events.out.tfevents") files += 1 assert files == 1
def test_cifar_convnet_distributed(device_id): # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-tensorboard_logdir", tb_logdir, "-q", "32", "-r", "-device", str(device_id) ] mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.75, False, per_minibatch_tolerance=1e-2) # False since different workers may have different #cores # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name. tb_files = 0 for tb_file in os.listdir(tb_logdir): assert tb_file.startswith("events.out.tfevents") tb_files += 1 assert tb_files == 1
def run_cifar_convnet_distributed(): base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works #force_deterministic_algorithms() # TODO: do the above; they lead to slightly different results, so not doing it for now train_data = os.path.join(base_path, 'train_map.txt') mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') test_data = os.path.join(base_path, 'test_map.txt') num_quantization_bits = 32 return convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits, epoch_size=512, max_epochs=2)
def train_cifar_resnet_for_eval(test_device, output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) # unzip test images for eval with zipfile.ZipFile( os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip: for fn in range(6): myzip.extract('data/train/%05d.png' % (fn), output_dir) if test_device == 'cpu': print('train cifar_resnet only on GPU device. Use pre-trained models.') else: print('training cifar_resnet on GPU device...') reader_train = TrainResNet_CIFAR10.create_reader( os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = TrainResNet_CIFAR10.create_reader( os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir) return base_path
def test_binary_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def train_cifar_resnet_for_eval(test_device, output_dir): output_dir = os.path.abspath(output_dir) if not os.path.isdir(output_dir): os.mkdir(output_dir) base_path = prepare_test_data.prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) # unzip test images for eval with zipfile.ZipFile(os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip: for fn in range(6): myzip.extract('data/train/%05d.png'%(fn), output_dir) if test_device == 'cpu': print('train cifar_resnet only on GPU device. Use pre-trained models.') else: print('training cifar_resnet on GPU device...') reader_train = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True) reader_test = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir) return base_path
def test_cifar_convnet_distributed_1bitsgd(device_id): params = [ "-n", "2", "-m", "64", "-e", "3200", "-datadir", prepare_CIFAR10_data(), "-q", "1", "-r", "-device", "0" ] mpiexec_test(device_id, script_under_test, params, 0.75, True)
import numpy as np import os import sys import pytest import subprocess abs_path = os.path.dirname(os.path.abspath(__file__)) example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ResNet", "Python") sys.path.append(example_dir) sys.path.append(abs_path) from distributed_common import mpiexec_test from prepare_test_data import prepare_CIFAR10_data base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) script_under_test = os.path.join(example_dir, "TrainResNet_CIFAR10_Distributed.py") mpiexec_params = ["-n", "2"] def test_cifar_resnet_distributed(device_id): params = [ "-e", "2", "-datadir", base_path, "-q", "32", "-es", "512", "-r", "-device", str(device_id) ]
import signal import shutil import subprocess import re import pytest abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(abs_path) example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python") sys.path.append(example_dir) script_under_test = os.path.join(example_dir, "ConvNet_CIFAR10_DataAug_Distributed.py") from distributed_common import mpiexec_test, mpiexec_execute from prepare_test_data import prepare_CIFAR10_data base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) mpiexec_params = [ "-n", "2"] def test_cifar_convnet_distributed(device_id): # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) params = [ "-n", "2", "-m", "64",