def test_compress_decompress(): a = np.linspace(0, 100, num=1000000).reshape((100, 100, 100)) tolerance = 0.0000001 compressed = compress(a, tolerance=tolerance) recovered = decompress(compressed, a.shape, a.dtype, tolerance=tolerance) assert (a.shape == recovered.shape) assert (np.allclose(a, recovered))
def test_dim_order(): a = np.arange(32, dtype=np.float32).reshape((8, 4)) compressed = compress(a, rate=8) recovered = decompress(compressed[0:16], (4, 4), np.dtype('float32'), rate=8) b = np.arange(16, dtype=np.float32).reshape((4, 4)) assert (np.allclose(recovered, b))
def run_forward_error(filename, space_order=4, kernel='OT4', tolerance=1e-6, nbpml=10, dtype=np.float64, **kwargs): # Setup solver solver = overthrust_setup(filename=filename, tn=2000, nbpml=nbpml, space_order=space_order, kernel=kernel, dtype=dtype, **kwargs) nt = solver.geometry.time_axis.num nt_2 = int(floor(nt / 2)) # Run for nt/2 timesteps as a warm up rec, u, profiler = solver.forward(time=nt_2) # Store last timestep u_comp = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_comp.data # Force memory allocation # Compress-decompress with given tolerance compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True) mem = get_data(u_comp) mem[:] = decompress(compressed_u, mem.shape, mem.dtype, tolerance=tolerance) # Make new symbols so the data in the symbols above is not changed u_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_copy.data[:] = u.data[:] # Uncompressed/Reference version _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_copy) u_l_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) # Lossy version u_l_copy.data[:] = u_comp.data[:] _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_l_copy)
def run_forward_error(filename, space_order=4, kernel='OT4', tolerance=0.001, nbpml=10, dtype=np.float64, **kwargs): # Setup solver solver = overthrust_setup(filename=filename, tn=1000, nbpml=nbpml, space_order=space_order, kernel=kernel, dtype=dtype, **kwargs) # Run for nt/2 timesteps as a warm up nt = solver.geometry.time_axis.num nt_2 = int(floor(nt/2)) print("first run") rec, u, profiler = solver.forward(time=nt_2) print("second run") _, u2, _ = solver.forward(time=nt_2) assert(np.allclose(u.data, u2.data)) # Store last timestep u_comp = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_comp.data # Force memory allocation # Compress-decompress with given tolerance compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True) mem = get_data(u_comp) mem[:] = decompress(compressed_u, mem.shape, mem.dtype, tolerance=tolerance) for i in range(nt_2): # Run for i steps (original last time step and compressed version) clear_cache() u_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_copy.data[:] = u.data _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_copy) u_l_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_l_copy.data[:] = u_comp.data _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_l_copy) # Compare and report error metrics data = get_all_errors(get_data(u_original), get_data(u_lossy)) # error_field = u_original.data[nt_2+i] - u_lossy.data[nt_2+i] data['ntimesteps'] = i data['atol'] = tolerance write_results(data, "forward_prop_results.csv")
def pyzfp_decompress(c_bitstring, block_size): np_arr = pyzfp.decompress(c_bitstring, (block_size, ), np.dtype(np.float32), tolerance=1e-5) return np_arr
def zfp_decompress(params, indata): assert (isinstance(indata, CompressedObject)) return pyzfp.decompress(indata.data, indata.shape, indata.dtype, **params)
filename = args.filename plot = args.plot f = h5py.File(filename, 'r') field = f['data'][()].astype(np.float64) tolerances = [10**x for x in range(0, -17, -1)] error_to_plot = [] for atol in tolerances: print("Compressing at tolerance %s" % str(atol)) compressed = pyzfp.compress(field, tolerance=atol) decompressed = pyzfp.decompress(compressed, shape=field.shape, dtype=field.dtype, tolerance=atol) computed_errors = {} computed_errors['cf'] = len(field.tostring()) / float(len(compressed)) for k, v in error_metrics.items(): computed_errors[k] = v(field, decompressed) error_function = error_metrics[plot] error_to_plot.append(computed_errors[plot]) computed_errors['tolerance'] = atol write_results(computed_errors, 'direct_compression_results.csv') plt.xscale('log') plt.yscale('log')
f = h5py.File(filename, 'r') uncompressed = f['data'][()].astype(np.dtype('float64')) print( "\"Size of compressed field\", \"Compression Factor\", \"Compression time\", \"Decompression time\", \"Tolerance\", \"Error norm\", \"Maximum error\"" ) for p_i in range(0, 16): tolerance = 0.1**p_i with Timer(factor=1000) as t: if compressor == "zfp": kwargs = {'parallel': parallel, 'tolerance': tolerance} else: kwargs = {'tolerance': tolerance} compressed = compress(uncompressed, **kwargs) with Timer(factor=1000) as t2: if compressor == "zfp": kwargs = {'parallel': parallel, 'tolerance': tolerance} else: kwargs = {} decompressed = decompress(compressed, uncompressed.shape, uncompressed.dtype, **kwargs) #to_hdf5(decompressed, "decompressed-t-%d.h5"%p_i) error_matrix = decompressed - uncompressed if p_i in (0, 8, 16): to_hdf5(error_matrix, "error_field-%s-%d.h5" % (compressor, p_i)) print("%f, %f, %f, %f, %.16f, %f, %f" % (len(compressed), len(uncompressed.tostring()) / float(len(compressed)), t.elapsed, t2.elapsed, tolerance, np.linalg.norm(error_matrix), np.max(error_matrix)))
def reconstruct_twix(infile, outfile=None): #wip: function takes no parameters, all necessary information needs to be included in hdf file def write_sync_bytes(f): syncbytes = (512 - f.tell() % 512) % 512 # print('syncbytes', syncbytes) f.write(b'\x00' * syncbytes) if outfile is None: with tables.open_file(infile, mode="r") as f: outfile = f.root._v_attrs.original_filename t_start = time.time() with tables.open_file(infile, mode="r") as f, open(outfile, 'wb') as fout: cc_mode = f.root._v_attrs.cc_mode zfp = f.root._v_attrs.zfp zfp_tol = f.root._v_attrs.zfp_tol if zfp_tol < 0: zfp_tol = None zfp_prec = f.root._v_attrs.zfp_prec if zfp_prec < 0: zfp_prec = None inv_mtx = None if cc_mode is not False and hasattr(f.root, 'mtx'): mtx = f.root.mtx[()] if cc_mode == 'scc' or cc_mode == 'gcc': inv_mtx = np.zeros_like(mtx).swapaxes(1, -1) for x in range(mtx.shape[0]): inv_mtx[x, :, :] = np.linalg.pinv(mtx[x, :, :]) else: # do not invert BART mtx inv_mtx = mtx.copy() del (mtx) # allocate space for multi-header fout.write(b'\x00' * 10240) scan_pos = list() scan_len = list() scanlist = f.root._v_attrs.scanlist for scan in scanlist: # keep track of byte pos scan_pos.append(fout.tell()) # write header getattr(f.root, scan).hdr_str[()].tofile(fout) for mdh_key, raw_info in enumerate(getattr(f.root, scan).info[()]): info = np.frombuffer(raw_info, dtype=datinfo_type)[0] # write mdh mdh = info['mdh_info'] mdh.tofile(fout) rm_os_active = info['rm_os_active'] cc_active = info['cc_active'] # write data is_bytearray = mdh_def.is_flag_set( mdh, 'ACQEND') or mdh_def.is_flag_set(mdh, 'SYNCDATA') if is_bytearray: data = getattr(f.root, scan).DATA[mdh_key] data.tofile(fout) else: n_sampl = mdh['ushSamplesInScan'] n_coil = mdh['ushUsedChannels'] n_data_sampl = n_sampl if rm_os_active: n_data_sampl //= 2 n_data_coils = n_coil if cc_mode and cc_active: n_data_coils = inv_mtx.shape[-1] if cc_mode == 'scc_bart' or cc_mode == 'gcc_bart': n_data_coils = f.root._v_attrs.ncc data = getattr(f.root, scan).DATA[mdh_key] if zfp: data = np.frombuffer(data, dtype='uint8') data = memoryview(data) data = pyzfp.decompress( data, [n_data_coils * 2 * n_data_sampl], np.dtype('float32'), tolerance=zfp_tol, precision=zfp_prec) data = np.ascontiguousarray(data).view('complex64') data = data.reshape(n_data_coils, n_data_sampl) if cc_mode and cc_active: data = expand_data(data, mdh, rm_os_active, cc_mode=cc_mode, inv_mtx=inv_mtx) else: data = expand_data(data, mdh, rm_os_active, cc_mode=False) data = data.reshape((n_coil, -1)) coil_hdr = info['coil_info'] buffer = bytes() for cha, cha_id in enumerate( info['coil_list'][:data.shape[0]]): #write channel id to buffer coil_hdr['ulChannelId'] = cha_id buffer += coil_hdr.tobytes() # write data to buffer buffer += data[cha].tobytes() # write buffer to file fout.write(buffer) # update scan_len scan_len.append(fout.tell() - scan_pos[-1]) # add sync bytes between scans write_sync_bytes(fout) # now write preallocated MultiRaidFileHeader n_scans = len(scan_pos) multi_header = np.frombuffer(f.root.multi_header[()], hdr_def.MultiRaidFileHeader)[0] # write NScans multi_header['hdr']['count_'] = n_scans # write scan_pos & scan_len for each scan for i, (pos_, len_) in enumerate(zip(scan_pos, scan_len)): # print('scan', i, ' len_ old: ', multi_header['entry'][i]['len_'], ' new:', len_) # print('scan', i, ' off_ old: ', multi_header['entry'][i]['off_'], ' new:', pos_) multi_header['entry'][i]['len_'] = len_ multi_header['entry'][i]['off_'] = pos_ # write MultiRaidFileHeader fout.seek(0) multi_header.tofile(fout) elapsed_time = (time.time() - t_start) print("decompression finished in %d:%02d:%02d h" % (elapsed_time // 3600, (elapsed_time % 3600) // 60, elapsed_time % 60))
for i, line in enumerate(lines_to_read): lines_to_compress[i, :, :] = segyfile.xline[segyfile.xlines[LINE_NO]] bitrates = [4, 2, 1] decompressed_slices = {} for bits_per_voxel in bitrates: padded_shape = (4, pad(lines_to_compress.shape[1], 4), pad(lines_to_compress.shape[2], 2048 // bits_per_voxel)) data_padded = np.zeros(padded_shape, dtype=np.float32) data_padded[0:4, 0:lines_to_compress.shape[1], 0:lines_to_compress.shape[2]] = lines_to_compress compressed = compress(data_padded, rate=bits_per_voxel) decompressed = decompress( compressed, (padded_shape[0], padded_shape[1], padded_shape[2]), np.dtype('float32'), rate=bits_per_voxel) decompressed_slices[bits_per_voxel] = decompressed[LINE_NO % 4, 0:slice_segy.shape[1], 0:slice_segy.shape[0]].T CLIP = 45000.0 SCALE = 1.0 / (2.0 * CLIP) from PIL import Image im = Image.fromarray( np.uint8(cm.seismic((slice_segy.clip(-CLIP, CLIP) + CLIP) * SCALE) * 255)) im.save(os.path.join(outpath, 'test_inline-orig.png'))