def test_cifar_resnet_distributed(device_id): params = [ "-datadir", base_path] str_out = mpiexec_execute(script_under_test, mpiexec_params, params) #Training loss of the generator at worker: {0} is: {2.201804}, time taken is: {40} seconds results = re.findall("Training loss of the generator at worker: \{.+?\} is: \{.+?\}", str_out) assert(len(results) == 4)
def test_finalize_with_exception_no_hang(): str_out = mpiexec_execute(__file__, ["-n", "2"], []) results = re.findall("Completed with exception.", str_out) assert len(results) == 1 results = re.findall("Completed successfully.", str_out) assert len(results) == 0
def test_sample_count_with_several_distributed_learners(): str_out = mpiexec_execute(__file__, ["-n", "2"], []) results = re.findall("Completed with exception.", str_out) if len(results) != 0: print(str_out) assert False results = re.findall("Completed successfully.", str_out) if len(results) != 2: print(str_out) assert False
def test_htk_lstm_truncated_distributed_gpu_with_cv(device_id): # Make sure that full sequence cross validation # works in the middle of bptt training params = [ "-n", "2", "-datadir", an4_dataset_directory(), "-q", "1", "-m", "640", "-e", "1500", "-cvfreq", "1000", "-device", str(device_id) ] output = mpiexec_execute(device_id=device_id, script=script_under_test, mpiexec_params=mpiexec_params, params=params) results = re.findall(r"Finished Evaluation \[.+?\]: Minibatch\[.+?\]: metric = (.+?)%", output) assert len(results) == 6, output
def test_cifar_convnet_distributed_block_momentum(device_id): params = [ "-n", "1", "-m", "64", "-e", "13000", "-datadir", base_path, "-b", "1600", "-r", "-device", str(device_id) ] # 13000 samples / 2 worker / 64 mb_size = 101 minibatchs. # We expect to see only Minibatch[ 1 -100] output = mpiexec_execute(script_under_test, mpiexec_params, params, device_id=device_id) results = re.findall(r"Minibatch\[(.+?)\]: loss = .+?%", output) assert len(results) == 2 assert results[0] == ' 1- 100' assert results[1] == ' 1- 100'
def test_cifar_convnet_distributed_block_momentum(device_id): params = [ "-n", "1", "-m", "64", "-e", "13000", "-datadir", base_path, "-b", "1600", "-r", "-device", str(device_id) ] # 13000 samples / 2 worker / 64 mb_size = 101 minibatchs. # We expect to see only Minibatch[ 1 -100] output = mpiexec_execute(script_under_test, mpiexec_params, params, device_id=device_id) results = re.findall("Minibatch\[(.+?)\]: loss = .+?%", output) assert len(results) == 2 assert results[0] == ' 1- 100' assert results[1] == ' 1- 100'