def test_holo_reconstruction(lt_ctx, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") # Prepare image parameters and mesh nx, ny = (5, 7) sx, sy = (64, 64) slice_crop = (slice(None), slice(None), slice(sx // 4, sx // 4 * 3), slice(sy // 4, sy // 4 * 3)) lnx = np.arange(nx) lny = np.arange(ny) lsx = np.arange(sx) lsy = np.arange(sy) mnx, mny, msx, msy = np.meshgrid(lnx, lny, lsx, lsy) # Prepare phase image phase_ref = np.pi * msx * (mnx.max() - mnx) * mny / sx**2 \ + np.pi * msy * mnx * (mny.max() - mny) / sy**2 # Generate holograms holo = np.zeros_like(phase_ref) ref = np.zeros_like(phase_ref) for i in range(nx): for j in range(ny): holo[j, i, :, :] = hologram_frame(np.ones((sx, sy)), phase_ref[j, i, :, :]) ref[j, i, :, :] = hologram_frame(np.ones((sx, sy)), np.zeros((sx, sy))) # Prepare LT datasets and do reconstruction dataset_holo = MemoryDataSet(data=holo, tileshape=(ny, sx, sy), num_partitions=2, sig_dims=2) dataset_ref = MemoryDataSet(data=ref, tileshape=(ny, sx, sy), num_partitions=1, sig_dims=2) sb_position = [11, 6] sb_size = 6.26498204 holo_job = HoloReconstructUDF(out_shape=(sx, sy), sb_position=sb_position, sb_size=sb_size) try: if backend == 'cupy': set_use_cuda(cudas[0]) w_holo = lt_ctx.run_udf(dataset=dataset_holo, udf=holo_job)['wave'].data w_ref = lt_ctx.run_udf(dataset=dataset_ref, udf=holo_job)['wave'].data finally: set_use_cpu(0) w = w_holo / w_ref phase = np.angle(w) assert np.allclose(phase_ref[slice_crop], phase[slice_crop], rtol=0.12)
def test_multi_mask_stack_force_dense(lt_ctx, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") try: if backend == 'cupy': set_use_cuda(cudas[0]) data = _mk_random(size=(16, 16, 16, 16), dtype="<u2") masks = sparse.COO.from_numpy(_mk_random(size=(2, 16, 16))) expected = _naive_mask_apply(masks, data) dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2) analysis = lt_ctx.create_mask_analysis(dataset=dataset, factories=lambda: masks, use_sparse=False, mask_count=2) results = lt_ctx.run(analysis) assert np.allclose( results.mask_0.raw_data, expected[0], ) assert np.allclose( results.mask_1.raw_data, expected[1], ) finally: set_use_cpu(0)
def test_sig_slice(lt_ctx, backend, udf_class, tileshape, success): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") data = _mk_random(size=(30, 3, 7), dtype="float32") ref_res = data.sum(axis=(-1, -2)) dataset = MemoryDataSet(data=data, tileshape=tileshape, num_partitions=2, sig_dims=2) try: if backend == 'cupy': set_use_cuda(cudas[0]) udf = udf_class() if success: res = lt_ctx.run_udf(udf=udf, dataset=dataset) assert np.allclose(res['checksum'].raw_data, ref_res) else: with pytest.raises(Exception): lt_ctx.run_udf(udf=udf, dataset=dataset) finally: set_use_cpu(0)
def test_run_cuda(lt_ctx, mask_cupy): # The cupy module is set to None in mask_cupy fixture so that # any use of it will raise an error with pytest.raises(ModuleNotFoundError): import cupy # NOQA: F401 data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cuda', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): print(val) assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_run_cupy(lt_ctx, mock_cupy): data = _mk_random(size=(16, 16, 16, 16)) ds = lt_ctx.load("memory", data=data) use_cpu = bae.get_use_cpu() use_cuda = bae.get_use_cuda() backend = bae.get_device_class() with mock.patch.dict(os.environ, {'LIBERTEM_USE_CUDA': "23"}): # This should set the same environment variable as the mock above # so that it will be unset after the "with" bae.set_use_cuda(23) # add `numpy.cuda` so we can make `numpy` work as a mock replacement for `cupy` with mock.patch('numpy.cuda', return_value=MockCuda, create=True): res = lt_ctx.run_udf( udf=DebugDeviceUDF(backends=('cupy', 'numpy')), dataset=ds ) for val in res['device_id'].data[0].values(): assert val['cpu'] is None assert val['cuda'] == 23 # We make sure that the mocking was successful, i.e. # restored the previous state assert use_cpu == bae.get_use_cpu() assert use_cuda == bae.get_use_cuda() assert backend == bae.get_device_class() assert np.all(res['device_class'].data == 'cuda') assert np.allclose(res['on_device'].data, data.sum(axis=(0, 1)))
def test_noncontiguous_tiles(lt_ctx, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") data = _mk_random(size=(30, 3, 7), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 2, 2), num_partitions=2, sig_dims=2) try: if backend == 'cupy': set_use_cuda(cudas[0]) udf = ReshapedViewUDF() res = lt_ctx.run_udf(udf=udf, dataset=dataset) partition = next(dataset.get_partitions()) p_udf = udf.copy_for_partition(partition=partition, roi=None) # Enabling debug=True checks for disjoint cache keys UDFRunner([p_udf], debug=True).run_for_partition( partition=partition, roi=None, corrections=None, env=Environment(threads_per_worker=1), ) finally: set_use_cpu(0) assert np.all(res["sigbuf"].data == 1)
def test_ssb(dpix, backend, n_threads): lt_ctx = lt.Context(InlineJobExecutor(debug=True, inline_threads=n_threads)) try: if backend == 'cupy': set_use_cuda(0) dtype = np.float64 scaling = 4 shape = (29, 30, 189 // scaling, 197 // scaling) # The acceleration voltage U in keV U = 300 lamb = wavelength(U) # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = 93 // scaling cx = 97 // scaling input_data = ( np.random.uniform(0, 1, np.prod(shape)) * np.linspace(1.0, 1000.0, num=np.prod(shape)) ) input_data = input_data.astype(np.float64).reshape(shape) udf = SSB_UDF(lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx, method='subpix') dataset = MemoryDataSet( data=input_data, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) result = lt_ctx.run_udf(udf=udf, dataset=dataset) result_f, reference_masks = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) task_data = udf.get_task_data() udf_masks = task_data['masks'].computed_masks half_y = shape[0] // 2 + 1 # Use symmetry and reshape like generate_masks() reference_masks = reference_masks[:half_y].reshape((half_y*shape[1], shape[2], shape[3])) print(np.max(np.abs(udf_masks.todense() - reference_masks))) print(np.max(np.abs(result['fourier'].data - result_f))) assert np.allclose(result['fourier'].data, result_f) backwards = result['amplitude'].data**2 * np.exp(1j*result['phase'].data) assert np.allclose(result['fourier'].data, np.fft.fft2(backwards)) finally: if backend == 'cupy': set_use_cpu(0)
def worker_setup(resource, device): # Disable handling Ctrl-C on the workers for a local cluster # since the nanny restarts workers in that case and that gets mixed # with Ctrl-C handling of the main process, at least on Windows signal.signal(signal.SIGINT, signal.SIG_IGN) if resource == "CUDA": set_use_cuda(device) elif resource == "CPU": set_use_cpu(device) else: raise ValueError("Unknown resource %s, use 'CUDA' or 'CPU'", resource)
def test_udf_noncontiguous_tiles(lt_ctx, backend, benchmark): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") data = np.zeros(shape=(30, 3, 256), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 2, 2), num_partitions=2, sig_dims=2) try: if backend == 'cupy': set_use_cuda(cudas[0]) udf = NoopSigUDF() res = benchmark(lt_ctx.run_udf, udf=udf, dataset=dataset) finally: set_use_cpu(0) assert np.all(res["sigbuf"].data == 0)
def test_multi_masks(lt_ctx, TYPE, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") try: if backend == 'cupy': set_use_cuda(cudas[0]) data = _mk_random(size=(16, 16, 16, 16), dtype="<u2") mask0 = _mk_random(size=(16, 16)) mask1 = sp.csr_matrix(_mk_random(size=(16, 16))) mask2 = sparse.COO.from_numpy(_mk_random(size=(16, 16))) expected = _naive_mask_apply([mask0, mask1, mask2], data) dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2) analysis = lt_ctx.create_mask_analysis( dataset=dataset, factories=[lambda: mask0, lambda: mask1, lambda: mask2], ) analysis.TYPE = TYPE results = lt_ctx.run(analysis) assert np.allclose( results.mask_0.raw_data, expected[0], ) assert np.allclose( results.mask_1.raw_data, expected[1], ) assert np.allclose( results.mask_2.raw_data, expected[2], ) finally: set_use_cpu(0)
def test_ssb_container(dpix, lt_ctx, backend): try: if backend == 'cupy': set_use_cuda(0) dtype = np.float64 scaling = 4 shape = (29, 30, 189 // scaling, 197 // scaling) # The acceleration voltage U in keV U = 300 lamb = wavelength(U) # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = 93 // scaling cx = 97 // scaling input_data = (np.random.uniform(0, 1, np.prod(shape)) * np.linspace(1.0, 1000.0, num=np.prod(shape))) input_data = input_data.astype(np.float64).reshape(shape) masks = generate_masks(reconstruct_shape=shape[:2], mask_shape=shape[2:], dtype=dtype, lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx, method='subpix') mask_container = MaskContainer( mask_factories=lambda: masks, dtype=masks.dtype, use_sparse='scipy.sparse.csc', count=masks.shape[0], ) udf = SSB_UDF(lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx, mask_container=mask_container) dataset = MemoryDataSet( data=input_data, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) result = lt_ctx.run_udf(udf=udf, dataset=dataset) result_f, reference_masks = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) task_data = udf.get_task_data() udf_masks = task_data['masks'].computed_masks half_y = shape[0] // 2 + 1 # Use symmetry and reshape like generate_masks() reference_masks = reference_masks[:half_y].reshape( (half_y * shape[1], shape[2], shape[3])) print(np.max(np.abs(udf_masks.todense() - reference_masks))) print(np.max(np.abs(result['pixels'].data - result_f))) assert np.allclose(result['pixels'].data, result_f) finally: if backend == 'cupy': set_use_cpu(0)