def test_output_subset_evaluation(device_id): try: gpu_device = C.gpu(0) except ValueError: pytest.skip('Test only runs when GPU available') device = cntk_device(device_id) x1 = C.input_variable(shape=()) op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1) x2 = C.input_variable(shape=(1)) # Deliberately locate the parameter on a different device # instead of the actual compute target device, so that # if we try to use this parameter, it results in an error if (device.type() == 0): parameter_device = gpu_device else: parameter_device = C.cpu() p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device) op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p op = C.combine([op1, op2]); _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device) assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
def test_cpu_and_gpu_devices(): device = C.cpu() assert device.type() == C.device.DeviceKind.CPU assert device.id() == 0 for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert device.type() == C.device.DeviceKind.GPU assert device.id() == i
def test_set_excluded_devices(): if len(C.device.all_devices()) == 1: return; assert C.try_set_default_device(C.cpu(), False) assert C.try_set_default_device(C.gpu(0), False) C.set_excluded_devices([C.cpu()]) assert not C.try_set_default_device(C.cpu(), False) C.set_excluded_devices([]) assert C.try_set_default_device(C.cpu(), False)
def mpi_worker(working_dir, mb_source, gpu): comm_rank = cntk.distributed.Communicator.rank() np.random.seed(comm_rank) if gpu: # test with only one GPU cntk.try_set_default_device(cntk.gpu(0)) frame_mode = (mb_source == "ctf_frame") bmuf = SimpleBMUFTrainer(frame_mode) for i, data in enumerate(get_minibatch(bmuf, working_dir, mb_source)): bmuf.trainer.train_minibatch(data) if i % 50 == 0: bmuf.trainer.summarize_training_progress()
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return; # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def distributed_worker(outdir, gpu, mode, config): if gpu: # test with only one GPU C.try_set_default_device(C.gpu(0)) else: # CPU sparse aggregation is not implemented, so turn it off # note we only need to explicitly do this when running with CPU device on a GPU build # For CPU build it's disabled by default C.cntk_py.use_sparse_gradient_aggregation_in_data_parallel_sgd(False) trainer = SimpleTrainer(mode, config) for batch in range(NUM_BATCHES): set_np_random_seed(C.Communicator.rank(), batch) indices = (np.random.random((BATCH_SIZE_PER_WORKER,))*(trainer.input_dim-1)).astype(np.int) trainer.train_minibatch(indices) checkpoint_file = os.path.join(outdir, mode+str(batch)) trainer.trainer.save_checkpoint(checkpoint_file) trainer.trainer.restore_from_checkpoint(checkpoint_file) # save a checkpoint to force sync after last minibatch trainer.trainer.save_checkpoint(os.path.join(outdir, mode+'_last')) np.save(os.path.join(outdir, mode+str(C.Communicator.rank())), trainer.p.value)
def test_all_devices(): assert len(C.device.all_devices()) > 0 assert C.cpu() in C.device.all_devices() if (len(C.device.all_devices()) > 1): assert C.gpu(0) in C.device.all_devices()
def is_locked_cross_process(queue, device_id): device = C.cpu() if device_id < 0 else C.gpu(device_id) queue.put(device.is_locked())
def load_cnn_model(fn, gpu_id=0): cntk.try_set_default_device(cntk.gpu(gpu_id)) cntk.use_default_device() return cntk.load_model(fn)