Exemplo n.º 1
0
def send_mm(sendSocket, frame, atol=None, compDataCache=None, key=None):
    """USed to send numpy matrix data using zfpy compression module.
    If the main node is distributing the work matrices, then a caching
    mechanism can be used to keep track of already compressed data.

    Args:
        sendSocket (socket): The socket connection on which the data is being sent
        frame (ndarray): The numpy matrix being sent
        compDataCache (dict, optional): A dictionary which represents a caching mechanism
                                        Defaults to None.
        key (any, optional): The key can generally be anything, the correct use of the key is left to the user
                                Defaults to None.

    Raises:
        TypeError: Raised if the data provided in not a numpy array
        Exception: Raised if the connection is broken during transmission of the data
    """
    if not isinstance(frame, np.ndarray):
        raise TypeError("input frame is not a valid numpy array")

    data = None
    # if the main node is sending the matrix data then
    # use the caching mechanism inplace in the event of a failed
    # worker node, data does not need to be recompressed
    if ((compDataCache != None) and (key != None)):
        needToCompress = False
        cacheMutex.acquire()
        if key not in compDataCache:
            needToCompress = True
        cacheMutex.release()

        # compress if the key could not be found initially
        if needToCompress:
            if atol == None:
                compFrame = zfpy.compress_numpy(frame)
            else:
                compFrame = zfpy.compress_numpy(frame, atol=atol)
            # protect cache data structure
            cacheMutex.acquire()
            compDataCache[key] = compFrame
            cacheMutex.release()

        cacheMutex.acquire()
        data = struct.pack('>I', len(compDataCache[key])) + compDataCache[key]
        cacheMutex.release()
    else:
        if atol == None:
            compFrame = zfpy.compress_numpy(frame)
        else:
            compFrame = zfpy.compress_numpy(frame, atol=atol)
        data = struct.pack('>I', len(compFrame)) + compFrame

    # try to send all the data packet constructed
    try:
        sendSocket.sendall(data)
    except BrokenPipeError:
        cacheMutex.release()
        raise Exception('Connection broken!')
Exemplo n.º 2
0
def zfpy_compress(typed_column):
    """
    compresses floats and integers with fpzip codec.
    """
    # converts all data to floats (can take in integers, but will convert).
    numpy_array = np.array(typed_column, dtype=np.float32, order='C')
    compressed_bitstring = zfpy.compress_numpy(numpy_array)
    return compressed_bitstring
Exemplo n.º 3
0
        def encode(self, buf):

            # normalise inputs
            buf = ensure_contiguous_ndarray(buf)

            # do compression
            return _zfpy.compress_numpy(buf,
                                        write_header=True,
                                        **self.compression_kwargs)
Exemplo n.º 4
0
def consumer(queue, header, out_filename, bits_per_voxel):
    """Fetches compressed sets of inlines (or just blocks) and writes them to disk"""
    with open(out_filename, 'wb') as f:
        f.write(header)
        while True:
            segy_buffer = queue.get()
            compressed = zfpy.compress_numpy(segy_buffer, rate=bits_per_voxel, write_header=False)
            f.write(compressed)
            queue.task_done()
 def numpy_data_to_queue_data(self, numpy_data):
     """
     Convert numpy data to queue data that will be serialized through pyro.
     :param numpy_data: numpy array.
     :return: Tuple of bytes, shape, and dtype.
     """
     if self.compression_type == 'lz4':
         return lz4.frame.compress(
             numpy_data.tobytes()), numpy_data.shape, numpy_data.dtype
     elif self.compression_type == 'zfp':
         return zfpy.compress_numpy(numpy_data)
     return numpy_data.tobytes(), numpy_data.shape, numpy_data.dtype
Exemplo n.º 6
0
    def test_TS_01(self):
        import xarray as xr
        import zfpy

        ds = xr.open_dataset('../data/orig.TS.100days.nc')
        TS = ds.TS.values
        TS_compressed = zfpy.compress_numpy(TS, tolerance=0.01)
        TS_decompressed = zfpy.decompress_numpy(TS_compressed)

        em = ErrorMetrics(observed=TS, modelled=TS_decompressed)
        print("mean squared error: ", em.mean_squared_error)

        em.get_all_metrics()
        print(em.get_all_metrics(exclude={"error", "squared_error", "absolute_error"}))
Exemplo n.º 7
0
def zfp_encode(data, level=None, mode=None, execution=None, header=True,
               out=None):
    kwargs = {'write_header': header}
    if mode in (None, zfp.mode_null, 'R', 'reversible'):  # zfp.mode_reversible
        pass
    elif mode in (zfp.mode_fixed_precision, 'p', 'precision'):
        kwargs['precision'] = -1 if level is None else level
    elif mode in (zfp.mode_fixed_rate, 'r', 'rate'):
        kwargs['rate'] = -1 if level is None else level
    elif mode in (zfp.mode_fixed_accuracy, 'a', 'accuracy'):
        kwargs['tolerance'] = -1 if level is None else level
    elif mode in (zfp.mode_expert, 'c', 'expert'):
        minbits, maxbits, maxprec, minexp = level
        raise NotImplementedError()
    return zfp.compress_numpy(data, **kwargs)
def mat_send(sendSocket, frame, logger):
    if not isinstance(frame, np.ndarray):
        raise TypeError("input frame is not a valid numpy array")

    compFrame = zfpy.compress_numpy(frame)

    data = struct.pack('>I', len(compFrame)) + compFrame

    try:
        sendSocket.sendall(data)
    except BrokenPipeError:
        logger.error("connection broken")
        raise

    logger.debug("frame sent")
Exemplo n.º 9
0
    def test_advanced_decompression_nonsquare(self):
        for dimensions in range(1, 5):
            shape = range(2, 2 + dimensions)
            random_array = np.random.rand(*shape)

            decompressed_array = np.empty_like(random_array)
            compressed_array = zfpy.compress_numpy(
                random_array,
                write_header=False,
            )
            zfpy._decompress(
                compressed_array,
                zfpy.dtype_to_ztype(random_array.dtype),
                random_array.shape,
                out= decompressed_array,
            )
            self.assertIsNone(np.testing.assert_array_equal(decompressed_array, random_array))
Exemplo n.º 10
0
def mat_send_comp(sendSocket, frame, logger):
    if not isinstance(frame, np.ndarray):
        raise TypeError("input frame is not a valid numpy array")

    sizeBefore = frame.size * frame.itemsize

    compFrame = zfpy.compress_numpy(frame)

    logger.info("Bytes: " + str(sizeBefore) + ' ---> ' +
                str(sys.getsizeof(compFrame)))

    data = struct.pack('>I', len(compFrame)) + compFrame

    try:
        sendSocket.sendall(data)
    except BrokenPipeError:
        logger.error("connection broken")
        raise

    logger.debug("frame sent")
Exemplo n.º 11
0
    def test_advanced_decompression_checksum(self):
        ndims = 2
        ztype = zfpy.type_float
        random_array = test_utils.getRandNumpyArray(ndims, ztype)
        mode = zfpy.mode_fixed_accuracy
        compress_param_num = 1
        compression_kwargs = {
            "tolerance": test_utils.computeParameterValue(
                mode,
                compress_param_num
            ),
        }
        compressed_array = zfpy.compress_numpy(
            random_array,
            write_header=False,
            **compression_kwargs
        )

        # Decompression using the "advanced" interface which enforces no header,
        # and the user must provide all the metadata
        decompressed_array = np.empty_like(random_array)
        zfpy._decompress(
            compressed_array,
            ztype,
            random_array.shape,
            out=decompressed_array,
            **compression_kwargs
        )
        decompressed_array_dims = decompressed_array.shape + tuple(0 for i in range(4 - decompressed_array.ndim))
        decompressed_checksum = test_utils.getChecksumDecompArray(
            decompressed_array_dims,
            ztype,
            mode,
            compress_param_num
        )
        actual_checksum = test_utils.hashNumpyArray(
            decompressed_array
        )
        self.assertEqual(decompressed_checksum, actual_checksum)
Exemplo n.º 12
0
def run(
    bulk_hs,
    surface_hs,
    surface_tile,
    bulk_rsi_direction,
    mp_grid,
    energy_linspace,
    energy_imag,
    out_dir,
    k_axes,
    surf_nsc,
    bulk_nsc,
    matrix_fmt,
    zfp_tolerance,
    example_submit
):
    mprint("Reading hamiltonians")
    He = si.get_sile(bulk_hs).read_hamiltonian()
    Hs = si.get_sile(surface_hs).read_hamiltonian()

    if bulk_nsc is not None:
        He.set_nsc(bulk_nsc)
    if surf_nsc is not None:
        Hs.set_nsc(surf_nsc)

    with pick_a_rank() as r:
        mprint(f"Writing hamiltonians ({r})", printer=r)
        if rank == r:
            He.write(out_dir / "bulk_hamiltonian.nc")
            Hs.write(out_dir / "surface_hamiltonian.nc")

    rsi = si.physics.RecursiveSI(He, bulk_rsi_direction)
    rssi = si.physics.RealSpaceSI(rsi, Hs, k_axes, unfold=surface_tile)

    coupling_geom, se_indices = rssi.real_space_coupling(True)
    with pick_a_rank() as r:
        mprint(f"Saving coupling geometry ({r})", printer=r)
        if rank == r:
            coupling_geom.write(out_dir / "coupling_geometry.nc")
            np.save(out_dir / "coupling_geometry_indices", se_indices)

    parenths = rssi.real_space_parent()
    new_order = np.concatenate((se_indices, np.delete(np.arange(parenths.na), se_indices)))
    parenths = parenths.sub(new_order)
    with pick_a_rank() as r:
        mprint(f"Saving parent geometry ({r})", printer=r)
        if rank == r:
            parenths.geometry.write(out_dir / f"full_geometry.fdf")
            parenths.write(out_dir / f"full_geometry.nc")
    del parenths

    mp = np.array([1, 1, 1], dtype=int)
    mp[k_axes] = mp_grid
    rssi.set_options(bz=si.MonkhorstPack(coupling_geom, mp))

    E, dE = np.linspace(*energy_linspace[:2], int(energy_linspace[2]), retstep=True)
    nE = len(E)
    E = E + 1j*energy_imag
    with pick_a_rank() as r:
        mprint(f"Saving energy grid ({r})", printer=r)
        if rank == r:
            np.save(out_dir / "energy_grid", E)
            si.io.TableSile(out_dir / "energy_grid.table", "w").write_data(E.real, E.imag, np.full(E.shape, dE))

    nperrank = ceil(nE / comm.size)
    local_eidx = np.arange(rank * nperrank, min((rank + 1) * nperrank, nE))
    mprint("Energy grid distribution:", nperrank, "energy points per processor.")
    if (nE % nperrank):
        mprint(f"One process only has {nE % nperrank} energy points.")

    mprint(f"To assess processing progress, use `echo $(( 100 * $(find {out_dir} -type f -name 'SE_E*.npz' | wc -l) / {nE} ))%`")
    mprint(f"Note that these files due to the parallelism are probably created in bunches of {comm.size} and each bunch may take long to finish.")
    for ie, e in zip(local_eidx, E[local_eidx]):
        se = rssi.self_energy(e, bulk=True, coupling=True)
        mprint(f"SE{ie:>03d} calculated", printer=rank)
        if matrix_fmt == "npz":
            np.savez_compressed(out_dir / f"SE_E{ie:>03d}.npz", se)
        elif matrix_fmt == "zfp":
            bs = zfpy.compress_numpy(se.real, tolerance=zfp_tolerance)
            (out_dir / f"SE_E{ie:>03d}_REAL.zfp").write_bytes(bs)
            bs = zfpy.compress_numpy(se.imag, tolerance=zfp_tolerance)
            (out_dir / f"SE_E{ie:>03d}_IMAG.zfp").write_bytes(bs)
            del bs
        del se
        mprint(f"SE{ie:>03d} saved", printer=rank)
        gc.collect()
    mprint(f"MPI-rank {rank} done.", printer=rank)
    comm.Barrier()
    mprint((
        f"All done! Use `easySE gfdir2gf {out_dir}` to convert the parallel results"
        " into the tbtgf needed for Siesta/tbtrans."
    ))
Exemplo n.º 13
0
 def lossless_round_trip(self, orig_array):
     compressed_array = zfpy.compress_numpy(orig_array, write_header=True)
     decompressed_array = zfpy.decompress_numpy(compressed_array)
     self.assertIsNone(np.testing.assert_array_equal(decompressed_array, orig_array))
Exemplo n.º 14
0
    def test_utils(self):
        for ndims in range(1, 5):
            for ztype, ztype_str in [
                    (zfpy.type_float,  "float"),
                    (zfpy.type_double, "double"),
                    (zfpy.type_int32,  "int32"),
                    (zfpy.type_int64,  "int64"),
            ]:
                orig_random_array = test_utils.getRandNumpyArray(ndims, ztype)
                orig_random_array_dims = orig_random_array.shape + tuple(0 for i in range(4 - orig_random_array.ndim))
                orig_checksum = test_utils.getChecksumOrigArray(orig_random_array_dims, ztype)
                actual_checksum = test_utils.hashNumpyArray(orig_random_array)
                self.assertEqual(orig_checksum, actual_checksum)

                for stride_str, stride_config in [
                        ("as_is", test_utils.stride_as_is),
                        ("permuted", test_utils.stride_permuted),
                        ("interleaved", test_utils.stride_interleaved),
                        #("reversed", test_utils.stride_reversed),
                ]:
                    # permuting a 1D array is not supported
                    if stride_config == test_utils.stride_permuted and ndims == 1:
                        continue
                    random_array = test_utils.generateStridedRandomNumpyArray(
                        stride_config,
                        orig_random_array
                    )
                    random_array_dims = random_array.shape + tuple(0 for i in range(4 - random_array.ndim))
                    self.assertTrue(np.equal(orig_random_array, random_array).all())

                    for compress_param_num in range(3):
                        modes = [(zfpy.mode_fixed_accuracy, "tolerance"),
                                 (zfpy.mode_fixed_precision, "precision"),
                                 (zfpy.mode_fixed_rate, "rate")]
                        if ztype in [zfpy.type_int32, zfpy.type_int64]:
                            modes = [modes[-1]] # only fixed-rate is supported for integers
                        for mode, mode_str in modes:
                            # Compression
                            compression_kwargs = {
                                mode_str: test_utils.computeParameterValue(
                                    mode,
                                    compress_param_num
                                ),
                            }

                            compressed_array = zfpy.compress_numpy(
                                random_array,
                                write_header=False,
                                **compression_kwargs
                            )
                            compressed_checksum = test_utils.getChecksumCompArray(
                                random_array_dims,
                                ztype,
                                mode,
                                compress_param_num
                            )
                            actual_checksum = test_utils.hashCompressedArray(
                                compressed_array
                            )
                            self.assertEqual(compressed_checksum, actual_checksum)

                            # Decompression
                            decompressed_checksum = test_utils.getChecksumDecompArray(
                                random_array_dims,
                                ztype,
                                mode,
                                compress_param_num
                            )

                            # Decompression using the "public" interface
                            # requires a header, so re-compress with the header
                            # included in the stream
                            compressed_array = zfpy.compress_numpy(
                                random_array,
                                write_header=True,
                                **compression_kwargs
                            )
                            decompressed_array = zfpy.decompress_numpy(
                                compressed_array,
                            )
                            actual_checksum = test_utils.hashNumpyArray(
                                decompressed_array
                            )
                            self.assertEqual(decompressed_checksum, actual_checksum)