Exemplo n.º 1
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)
    reset_append_fp(orig, new, new_size, blosc_args=blosc_args)
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str * 2))
    nt.assert_equal(dcmp_str, new_str * 2)

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Exemplo n.º 2
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)
    reset_append_fp(orig, new, new_size, blosc_args=blosc_args)
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str * 2))
    nt.assert_equal(dcmp_str, new_str * 2)

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Exemplo n.º 3
0
    def decompress_meta(self, doc_idx):
        dtypes = self.doc_groups[doc_idx][
            'dtypes']  # needed for store from binary stream
        word2char_start = np.frombuffer(
            blosc.decompress(self.doc_groups[doc_idx]['word2char_start']),
            dtypes['word2char_start'])
        word2char_end = np.frombuffer(
            blosc.decompress(self.doc_groups[doc_idx]['word2char_end']),
            dtypes['word2char_end'])
        f2o_start = np.frombuffer(
            blosc.decompress(self.doc_groups[doc_idx]['f2o_start']),
            dtypes['f2o_start'])
        context = blosc.decompress(
            self.doc_groups[doc_idx]['context']).decode('utf-8')
        title = self.doc_groups[doc_idx]['title']  # not compressed

        return {
            'word2char_start': word2char_start,
            'word2char_end': word2char_end,
            'f2o_start': f2o_start,
            'context': context,
            'title': title,
            'offset': -2,
            'scale': 20,
        }
Exemplo n.º 4
0
def _client_unpack_data(buf, compressor='blosc'):
    """Unpack (on the client side) data packed (on the server side) by _server_pack_data().
    The compressor name passed to _server_pack_data() must also be passed
    to this function."""
    header_len = struct.unpack_from('<H', buf[:2])[0]
    dtype, shape, order = json.loads(
        bytes(buf[2:header_len + 2]).decode('ascii'))
    array_buf = buf[header_len + 2:]
    # NB: If this function exits with an exception involving zero-length slices, please upgrade your pyzmq
    # installation (the issue is known to be fixed pyzmq 14.6.0, and at the time this comment was written,
    # "pip-3.4 install pyzmq" grabbed 14.7.0).
    if compressor is None:
        data = array_buf
    elif compressor == 'zlib':
        data = zlib.decompress(array_buf)
    elif compressor == 'blosc':
        import blosc
        try:
            # This works as of June 2 (pyblosc git repo commit ID 487fe5531abc38faebd47b92a34991a1489a7ac3)
            data = blosc.decompress(array_buf)
        except TypeError:
            # However, as of Aug 11 2015, the version of pyblosc installed by pip-3.4 does not yet include
            # the fix, so most lab machines will fall through to the following legacy method, which copies
            # to a temporary intermediate buffer
            data = blosc.decompress(bytes(array_buf))
    array = numpy.ndarray(shape, dtype=dtype, order=order, buffer=data)
    array.flags.writeable = True
    return array
Exemplo n.º 5
0
def _client_unpack_data(buf, compressor='blosc'):
    """Unpack (on the client side) data packed (on the server side) by _server_pack_data().
    The compressor name passed to _server_pack_data() must also be passed
    to this function."""
    header_len = struct.unpack_from('<H', buf[:2])[0]
    dtype, shape, order = json.loads(bytes(buf[2:header_len+2]).decode('ascii'))
    array_buf = buf[header_len+2:]
    # NB: If this function exits with an exception involving zero-length slices, please upgrade your pyzmq
    # installation (the issue is known to be fixed pyzmq 14.6.0, and at the time this comment was written,
    # "pip-3.4 install pyzmq" grabbed 14.7.0).
    if compressor is None:
        data = array_buf
    elif compressor == 'zlib':
        data = zlib.decompress(array_buf)
    elif compressor == 'blosc':
        import blosc
        try:
            # This works as of June 2 (pyblosc git repo commit ID 487fe5531abc38faebd47b92a34991a1489a7ac3)
            data = blosc.decompress(array_buf)
        except TypeError:
            # However, as of Aug 11 2015, the version of pyblosc installed by pip-3.4 does not yet include
            # the fix, so most lab machines will fall through to the following legacy method, which copies
            # to a temporary intermediate buffer
            data = blosc.decompress(bytes(array_buf))
    array = numpy.ndarray(shape, dtype=dtype, order=order, buffer=data)
    array.flags.writeable = True
    return array
Exemplo n.º 6
0
 def __getitem__(self, key):
     if self.meta[key]['split']:
         return np.concatenate(
             list((np.frombuffer(minimizer.decompress(entry),
                                 dtype=self.meta[key]['dtype'])
                   for entry in self.storage[key]))).reshape(
                       self.meta[key]['shape'])
     else:
         return np.frombuffer(minimizer.decompress(self.storage[key]),
                              dtype=self.meta[key]['dtype']).reshape(
                                  self.meta[key]['shape'])
Exemplo n.º 7
0
    def test_decompress_input_types(self):
        import numpy as np
        # assume the expected answer was compressed from bytes
        expected = b'0123456789'
        compressed = blosc.compress(expected, typesize=1)

        # now for all the things that support the buffer interface
        self.assertEqual(expected, blosc.decompress(compressed))
        self.assertEqual(expected, blosc.decompress(memoryview(compressed)))

        self.assertEqual(expected, blosc.decompress(bytearray(compressed)))
        self.assertEqual(expected, blosc.decompress(np.array([compressed])))
Exemplo n.º 8
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)

    # need to create something that will be compressible even without shuffle,
    # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1
    to_append = np.zeros(int(2e6))
    to_append_fp = StringIO()
    to_append_fp.write(to_append.tostring())
    to_append_fp_size = to_append_fp.tell()
    to_append_fp.seek(0)

    # now do the append
    reset_append_fp(orig,
                    to_append_fp,
                    to_append_fp_size,
                    blosc_args=blosc_args)

    # decompress 'orig' so that we can examine it
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()

    # now sanity check the length and content of the decompressed
    nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size)
    nt.assert_equal(dcmp_str, new_str + to_append.tostring())

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Exemplo n.º 9
0
    def sample(self, size):
        size = min(size, len(self.mem))
        elements = random.sample(self.mem, size)

        elements_decompressed = []
        for i in range(size):
            element_decompressed = []
            element_decompressed.append(np.reshape(np.fromstring(blosc.decompress(elements[i][0]), dtype=np.uint8), tuple(self.shape)))
            element_decompressed.append(elements[i][1])
            element_decompressed.append(elements[i][2])
            element_decompressed.append(np.reshape(np.fromstring(blosc.decompress(elements[i][3]), dtype=np.uint8), tuple(self.shape)))
            element_decompressed.append(elements[i][4])
            elements_decompressed.append(element_decompressed)
        return elements_decompressed
Exemplo n.º 10
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)

    # need to create something that will be compressible even without shuffle,
    # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1
    to_append = np.zeros(int(2e6))
    to_append_fp = StringIO()
    to_append_fp.write(to_append.tostring())
    to_append_fp_size = to_append_fp.tell()
    to_append_fp.seek(0)

    # now do the append
    reset_append_fp(orig, to_append_fp, to_append_fp_size, blosc_args=blosc_args)

    # decompress 'orig' so that we can examine it
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()

    # now sanity check the length and content of the decompressed
    nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size)
    nt.assert_equal(dcmp_str, new_str + to_append.tostring())

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Exemplo n.º 11
0
    def test_decompress_input_types(self):
        import numpy as np
        # assume the expected answer was compressed from bytes
        expected = b'0123456789'
        compressed = blosc.compress(expected, typesize=1)

        # now for all the things that support the buffer interface
        self.assertEqual(expected, blosc.decompress(compressed))
        if not PY3X:
            # Python 3 no longer has the buffer
            self.assertEqual(expected, blosc.decompress(buffer(compressed)))
        self.assertEqual(expected, blosc.decompress(memoryview(compressed)))

        self.assertEqual(expected, blosc.decompress(bytearray(compressed)))
        self.assertEqual(expected, blosc.decompress(np.array([compressed])))
Exemplo n.º 12
0
def commit_spec_raw_val_from_db_val(db_val: bytes) -> DigestAndUserSpec:
    uncompressed_db_val = blosc.decompress(db_val)
    digest = _hash_func(uncompressed_db_val)
    commit_spec = json.loads(uncompressed_db_val)
    user_spec = CommitUserSpec(**commit_spec)
    res = DigestAndUserSpec(digest=digest, user_spec=user_spec)
    return res
Exemplo n.º 13
0
 def test_bitshuffle_not_multiple(self):
     # Check the fix for #133
     x = numpy.ones(27266, dtype='uint8')
     xx = x.tobytes()
     zxx = blosc.compress(xx, typesize=8, shuffle=blosc.BITSHUFFLE)
     last_xx = blosc.decompress(zxx)[-3:]
     self.assertEqual(last_xx, b'\x01\x01\x01')
Exemplo n.º 14
0
    def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
                c_hashes = list(set(c_hashs_schemas.keys()))
                tmpDB.close()
        else:
            c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
            c_hashes = list(set(c_hashs_schemas.keys()))

        pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest
        cIter = chunks.missingHashRequestIterator(commit, c_hashes, pb2_func)
        responses = self.stub.PushFindMissingHashRecords(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset: offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False)
        s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_missing_hashs]
        return s_mis_hsh_sch
Exemplo n.º 15
0
    def load_images(self, index, fmt, dtype=None, **kwargs):
        """Load images from files.

        Parameters
        ----------
        fmt : str
            Format of image files.
        dtype: str
            Cast images to specified dtype.

        Returns
        -------
        batch : ImageBatch
            Batch with uploaded images.
        """
        fname = self.index.images[index]
        if fmt == 'blosc':
            with open(fname, 'rb') as f:
                img = dill.loads(blosc.decompress(f.read()))
        elif fmt == 'jpeg':
            img = imread(fname, mode='L')
        else:
            raise ValueError('Unknown file format')
        if dtype:
            img = img.astype(dtype)
        if img.ndim == 2:
            img = img[:, :, np.newaxis]
        origin = np.array((((0, 0), (0, img.shape[1])),
                           ((img.shape[0], 0), (img.shape[0], img.shape[1]))))
        self.images[index] = img
Exemplo n.º 16
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if dtype == np.object_:
        return np.array(values, dtype=object)

    if not as_is_ext:
        values = values.encode('latin1')

    if compress == 'zlib':
        import zlib
        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == 'blosc':
        import blosc
        values = blosc.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values, dtype=dtype)
Exemplo n.º 17
0
def commit_ref_raw_val_from_db_val(commit_db_val: bytes) -> DigestAndDbRefs:
    """Load and decompress a commit ref db_val into python object memory.

    Parameters
    ----------
    commit_db_val : bytes
        Serialized and compressed representation of commit refs.

    Returns
    -------
    DigestAndDbRefs
        `digest` of the unpacked commit refs if desired for verification. `db_kvs`
        Iterable of binary encoded key/value pairs making up the repo state at the
        time of that commit. key/value pairs are already in sorted order.
    """
    uncomp_db_raw = blosc.decompress(commit_db_val)
    # if a commit has nothing in it (completely empty), the return from query == ()
    # the stored data is b'' from which the hash is calculated. We manually set these
    # values as the expected unpacking routine will not work correctly.
    if uncomp_db_raw == b'':
        refsDigest = _hash_func(b'')
        raw_db_kv_list = ()
    else:
        raw_joined_kvs_list = uncomp_db_raw.split(CMT_REC_JOIN_KEY)
        refsDigest = _commit_ref_joined_kv_digest(raw_joined_kvs_list)
        raw_db_kv_list = tuple(map(tuple, map(bytes.split, raw_joined_kvs_list)))

    return DigestAndDbRefs(digest=refsDigest, db_kvs=raw_db_kv_list)
Exemplo n.º 18
0
def __cereal_setstate__(self, state):
    if not cereal:
        raise NotImplementedError(
            "__cereal_setstate__ requires pyrosetta '--serialization' build.")

    self.__init__()
    try:
        if "blosc_cereal_binary_archive" in state and blosc:
            iss = rosetta.std.istringstream(
                blosc.decompress(state["blosc_cereal_binary_archive"]))
        elif "cereal_binary_archive" in state:
            iss = rosetta.std.istringstream(state["cereal_binary_archive"])
        else:
            if "blosc_cereal_binary_archive" in state:
                raise ValueError(
                    "No blosc, unable to load compressed pickle state: %s" %
                    tuple(state.keys()))
            else:
                raise ValueError("Unable to load unknown pickle state: %s" %
                                 tuple(state.keys()))

        self.load(cereal.BinaryInputArchive(iss))
    except Exception:
        logger.exception(
            "Error unpickling ceral archive type: %r"
            " archive_version: %r current_version: %r", type(self),
            state.get("cereal_archive_version", None),
            utility.Version.version())
        raise
Exemplo n.º 19
0
    def rgb(self):
        """retrieve the image a RGB array. Takes 13s"""
        if self._rgb is None:
            if colors is None:
                YUV = self.yuv.astype(numpy.int16)
            with self.sem:
                if self._rgb is None:
                    if colors:
                        resolution = self.camera_meta.get(
                            "resolution", (640, 480))
                        data = self.data if blosc is None else blosc.decompress(
                            self.data)
                        self._rgb = colors.yuv420_to_rgb16(data, resolution)
                    else:
                        YUV[:, :, 0] = YUV[:, :, 0] - 16  # Offset Y by 16
                        YUV[:, :, 1:] = YUV[:, :, 1:] - 128  # Offset UV by 128
                        # Calculate the dot product with the matrix to produce RGB output,
                        # clamp the results to byte range and convert to bytes
                        rgb = (YUV.dot(self.YUV2RGB) * 257.0).clip(
                            0, 65535).astype(numpy.uint16)
                        if self.dLUT is None:
                            self.cLUT, self.dLUT = calc_gamma()
                        self._rgb = self.dLUT.take(rgb)

        return self._rgb
Exemplo n.º 20
0
 def decode(self, data):
     if not data:
         return {'input': ([], {})}
     return unpackb(blosc.decompress(bytes(data)),
                    object_hook=self.deserialize_obj,
                    use_list=True,
                    raw=False)
Exemplo n.º 21
0
def get_one_batch(lmdb_batch_dir, idx):
    env = lmdb.Environment(lmdb_batch_dir, readonly=True, lock=False)
    with env.begin() as txn:
        buff = bytes(txn.get(str(idx).encode('ascii')))
        ser = blosc.decompress(buff)
        batch = pa.deserialize(ser)
    return batch
Exemplo n.º 22
0
    def FetchFindMissingLabels(self, request_iterator, context):
        """Determine metadata hash digest records existing on the server and not on the client.
      """ ''
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset:offset + size] = request.hashs
            offset += size
        uncompBytes = blosc.decompress(hBytes)
        c_hashset = set(msgpack.unpackb(uncompBytes, raw=False,
                                        use_list=False))

        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_hashes = set(queries.RecordQuery(tmpDB).metadata_hashes())
            tmpDB.close()

        c_missing = list(s_hashes.difference(c_hashset))
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingLabelsReply
        cIter = chunks.missingHashIterator(commit, c_missing, err, response_pb)
        yield from cIter
Exemplo n.º 23
0
    def cutout(self, x_rng, y_rng, z_rng, datatype, attempts=5):
        cutout_url_base = "{}/cutout/{}/{}/{}".format(BOSS_VERSION,
                                                      self.meta.collection(),
                                                      self.meta.experiment(),
                                                      self.meta.channel())
        cutout_url = "{}/{}/{}:{}/{}:{}/{}:{}/".format(cutout_url_base,
                                                       self.meta.res(),
                                                       x_rng[0], x_rng[1],
                                                       y_rng[0], y_rng[1],
                                                       z_rng[0], z_rng[1])
        if self.meta.iso():
            cutout_url += '?iso=True'

        for attempt in range(attempts):
            try:
                resp = self.get(cutout_url, {'Accept': 'application/blosc'})
                resp.raise_for_status()
            except Exception:
                if attempt != attempts - 1:
                    time.sleep(2**(attempt + 1))
            else:
                break
        else:
            # we failed all the attempts - deal with the consequences.
            raise ConnectionError(
                'Data from URL {} not fetched.  Status code {}, error {}'.
                format(cutout_url, resp.status_code, resp.reason))

        raw_data = blosc.decompress(resp.content)
        data = np.fromstring(raw_data, dtype=datatype)

        return np.reshape(
            data,
            (z_rng[1] - z_rng[0], y_rng[1] - y_rng[0], x_rng[1] - x_rng[0]),
            order='C')
Exemplo n.º 24
0
    def PushLabel(self, request, context):
        """Add a metadata key/value pair to the server with a particular digest.

        Like data tensors, the cryptographic hash of each value is verified
        before the data is actually placed on the server file system.
        """
        req_digest = request.rec.digest

        uncompBlob = blosc.decompress(request.blob)
        received_hash = hashlib.blake2b(uncompBlob, digest_size=20).hexdigest()
        if received_hash != req_digest:
            msg = f'HASH MANGED: received_hash: {received_hash} != digest: {req_digest}'
            context.set_details(msg)
            context.set_code(grpc.StatusCode.DATA_LOSS)
            err = hangar_service_pb2.ErrorProto(code=15, message=msg)
            reply = hangar_service_pb2.PushLabelReply(error=err)
            return reply

        digest = self.CW.label(received_hash, uncompBlob)
        if not digest:
            msg = f'HASH ALREADY EXISTS: {req_digest}'
            context.set_code(grpc.StatusCode.ALREADY_EXISTS)
            context.set_details(msg)
            err = hangar_service_pb2.ErrorProto(code=6, message=msg)
        else:
            err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.PushLabelReply(error=err)
        return reply
Exemplo n.º 25
0
    def push_find_missing_labels(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes())
                c_hashes = list(c_hashset)
                tmpDB.close()
        else:
            c_hashset = set(queries.RecordQuery(tmpDB).metadata_hashes())
            c_hashes = list(c_hashset)

        c_hashs_raw = [
            chunks.serialize_ident(digest, '') for digest in c_hashes
        ]
        raw_pack = chunks.serialize_record_pack(c_hashs_raw)
        pb2_func = hangar_service_pb2.FindMissingLabelsRequest
        cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func)
        responses = self.stub.PushFindMissingLabels(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset:offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_raw = chunks.deserialize_record_pack(uncompBytes)
        s_mis_hsh = [
            chunks.deserialize_ident(raw).digest for raw in s_missing_raw
        ]
        return s_mis_hsh
Exemplo n.º 26
0
def get_similar_images_from_category(image, category, num=-1, group='web'):
    result = red.hgetall('archive:{}:category:{}'.format(group, category))
    
    if len(result) < 2:
        return []
    
    Y = []
    X = []
    name_Y = []

    for k in result:
        h_s_unpacked = blosc.decompress(result[k])
        states = np.fromstring(h_s_unpacked, dtype=np.float32).reshape(2048)
        if k != image :
            Y.append(states)
            name_Y.append(k)
        else:
            X.append(states)

    Y = np.array(Y)
    X = np.array(X)

    D = cosine_similarity(X, Y)
    sort_indices = np.argsort(D[0])[::-1]

    return [ (unicode(name_Y[x], 'utf-8'), D[0][x]) for x in sort_indices[:10]]
Exemplo n.º 27
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if dtype == np.object_:
        return np.array(values, dtype=object)

    if not as_is_ext:
        values = values.encode('latin1')

    if compress == 'zlib':
        import zlib
        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == 'blosc':
        import blosc
        values = blosc.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values, dtype=dtype)
Exemplo n.º 28
0
    def fetch_find_missing_labels(self, commit):
        c_hash_keys = hashs.HashQuery(
            self.env.labelenv).list_all_hash_keys_db()
        c_hashset = set(map(parsing.hash_meta_raw_key_from_db_key,
                            c_hash_keys))
        c_hashs_raw = [
            chunks.serialize_ident(digest, '') for digest in c_hashset
        ]
        raw_pack = chunks.serialize_record_pack(c_hashs_raw)

        pb2_func = hangar_service_pb2.FindMissingLabelsRequest
        cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func)
        responses = self.stub.FetchFindMissingLabels(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset:offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_raw = chunks.deserialize_record_pack(uncompBytes)
        s_mis_hsh = [
            chunks.deserialize_ident(raw).digest for raw in s_missing_raw
        ]
        return s_mis_hsh
Exemplo n.º 29
0
    def FetchFindMissingHashRecords(self, request_iterator, context):
        """Determine data tensor hash records existing on the server and not on the client.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset:offset + size] = request.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        c_hashs_raw = chunks.deserialize_record_pack(uncompBytes)
        c_hashset = set(
            [chunks.deserialize_ident(raw).digest for raw in c_hashs_raw])

        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_hashes_schemas = queries.RecordQuery(
                tmpDB).data_hash_to_schema_hash()
            s_hashes = set(s_hashes_schemas.keys())
            tmpDB.close()

        c_missing = list(s_hashes.difference(c_hashset))
        c_hash_schemas_raw = [
            chunks.serialize_ident(c_mis, s_hashes_schemas[c_mis])
            for c_mis in c_missing
        ]
        raw_pack = chunks.serialize_record_pack(c_hash_schemas_raw)
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingHashRecordsReply
        cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb)
        yield from cIter
Exemplo n.º 30
0
def json_numpy_obj_hook(dct):
    """Decodes a previously encoded numpy ndarray with proper shape and dtype.
    And decompresses the data with blosc

    :param dct: (dict) json encoded ndarray
    :return: (ndarray) if input was an encoded ndarray
    """
    if isinstance(dct, dict) and '__ndarray__' in dct:
        array = dct['__ndarray__']
        if sys.version_info >= (
                3, 0
        ):  # http://stackoverflow.com/questions/24369666/typeerror-b1-is-not-json-serializable
            array = array.encode('utf-8')
        data = base64.b64decode(array)
        if has_blosc:
            data = blosc.decompress(data)

        try:
            dtype = np.dtype(ast.literal_eval(dct['dtype']))
        except ValueError:  # If the array is not a recarray
            dtype = dct['dtype']

        return np.frombuffer(data, dtype).reshape(dct['shape'])

    return dct
Exemplo n.º 31
0
 def test_bitshuffle_not_multiple(self):
     # Check the fix for #133
     x = numpy.ones(27266, dtype='uint8')
     xx = x.tobytes()
     zxx = blosc.compress(xx, typesize=8, shuffle=blosc.BITSHUFFLE)
     last_xx = blosc.decompress(zxx)[-3:]
     self.assertEqual(last_xx, b'\x01\x01\x01')
Exemplo n.º 32
0
    def test_decompress_releasegil(self):
        import numpy as np
        # assume the expected answer was compressed from bytes
        blosc.set_releasegil(True)
        expected = b'0123456789'
        compressed = blosc.compress(expected, typesize=1)

        # now for all the things that support the buffer interface
        if not PY3X:
            # Python 3 no longer has the buffer
            self.assertEqual(expected, blosc.decompress(buffer(compressed)))
        self.assertEqual(expected, blosc.decompress(memoryview(compressed)))

        self.assertEqual(expected, blosc.decompress(bytearray(compressed)))
        self.assertEqual(expected, blosc.decompress(np.array([compressed])))
        blosc.set_releasegil(False)
Exemplo n.º 33
0
    def PushFindMissingHashRecords(self, request_iterator, context):
        """Determine data tensor hash records existing on the client and not on the server.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset:offset + size] = request.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        c_hashs_raw = chunks.deserialize_record_pack(uncompBytes)
        c_hashset = set(
            [chunks.deserialize_ident(raw).digest for raw in c_hashs_raw])
        s_hashset = set(
            hashs.HashQuery(self.env.hashenv).list_all_hash_keys_raw())
        s_missing = c_hashset.difference(s_hashset)
        s_hashs_raw = [
            chunks.serialize_ident(s_mis, '') for s_mis in s_missing
        ]
        raw_pack = chunks.serialize_record_pack(s_hashs_raw)

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingHashRecordsReply
        cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb)
        yield from cIter
Exemplo n.º 34
0
    def push_find_missing_hash_records(self, commit):

        LMDB_CONFIG = config.get('hangar.lmdb')
        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **LMDB_CONFIG)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_hashset = set(queries.RecordQuery(tmpDB).data_hashes())
            s_hashes = list(s_hashset)
            tmpDB.close()

        pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest
        cIter = chunks.missingHashRequestIterator(commit, s_hashes, pb2_func)
        responses = self.stub.PushFindMissingHashRecords(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                commit = response.commit
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset:offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        missing_hashs = msgpack.unpackb(uncompBytes, raw=False, use_list=False)
        return missing_hashs
Exemplo n.º 35
0
    def _load_data(self, i, src, dst, dtype=None, **kwargs):
        """Load arrays with observational data from various formats."""
        path = self.index.iloc[i, self.index.columns.get_loc(src)]
        fmt = Path(path).suffix.lower()[1:]
        if fmt == 'blosc':
            with open(path, 'rb') as f:
                data = dill.loads(blosc.decompress(f.read()))
        elif fmt == 'npz':
            f = np.load(path)
            keys = list(f.keys())
            if len(keys) != 1:
                raise ValueError('Expected single key, found {}.'.format(
                    len(keys)))
            data = f[keys[0]]
        elif fmt == 'abp':
            data = load_abp_mask(path, **kwargs)
        elif fmt in ['fts', 'fits']:
            data = load_fits(path, **kwargs)
        else:
            data = imread(path, **kwargs)

        if dtype:
            data = data.astype(dtype)

        self.data[dst][i] = data
        return self
Exemplo n.º 36
0
def compress_and_store(
    hd5: h5py.File,
    data: np.ndarray,
    hd5_path: str,
):
    """Support function that takes arbitrary input data in the form of a Numpy array
    and compress, store, and checksum the data in a HDF5 file.

    Args:
        hd5 (h5py.File): Target HDF5-file handle.
        data (np.ndarray): Data to be compressed and saved.
        hd5_path (str): HDF5 dataframe path for the stored data.
    """
    data = data.copy(order='C')  # Required for xxhash
    compressed_data = blosc.compress(data.tobytes(),
                                     typesize=2,
                                     cname='zstd',
                                     clevel=9)
    hash_uncompressed = xxhash.xxh128_digest(data)
    hash_compressed = xxhash.xxh128_digest(compressed_data)
    decompressed = np.frombuffer(blosc.decompress(compressed_data),
                                 dtype=np.uint16).reshape(data.shape)
    assert (xxhash.xxh128_digest(decompressed) == hash_uncompressed)
    dset = hd5.create_dataset(hd5_path, data=np.void(compressed_data))
    # Store meta data:
    # 1) Shape of the original tensor
    # 2) Hash of the compressed data
    # 3) Hash of the uncompressed data
    dset.attrs['shape'] = data.shape
    dset.attrs['hash_compressed'] = np.void(hash_compressed)
    dset.attrs['hash_uncompressed'] = np.void(hash_uncompressed)
Exemplo n.º 37
0
    def fetch_label(self, digest: str) -> Tuple[str, bytes]:
        """get a the raw bytes for a metadata/label digest

        Parameters
        ----------
        digest : str
            digest to request from the server

        Returns
        -------
        Tuple[str, bytes]
            elements indicating [`digest`, `raw record bytes`]

        Raises
        ------
        RuntimeError
            if the received data does not match the requested hash value
        """
        rec = hangar_service_pb2.HashRecord(digest=digest)
        request = hangar_service_pb2.FetchLabelRequest(rec=rec)
        reply = self.stub.FetchLabel(request)

        uncompBlob = blosc.decompress(reply.blob)
        received_hash = hashlib.blake2b(uncompBlob, digest_size=20).hexdigest()
        if received_hash != digest:
            raise RuntimeError(
                f'received_hash: {received_hash} != digest: {digest}')
        return (received_hash, uncompBlob)
Exemplo n.º 38
0
    def test_get_object_bounding_box_single_cuboid(self):
        """ Test getting the bounding box of a object"""

        test_mat = np.ones((128, 128, 16))
        test_mat[0:128, 0:128, 0:16] = 4
        test_mat = test_mat.astype(np.uint64)
        test_mat = test_mat.reshape((16, 128, 128))
        bb = blosc.compress(test_mat, typesize=64)

        # Create request
        factory = APIRequestFactory()
        request = factory.post('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', bb,
                               content_type='application/blosc')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1',
                                    resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None)
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Create Request to get data you posted
        request = factory.get('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/',
                              accepts='application/blosc')

        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1',
                                    resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None).render()
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        # Decompress
        raw_data = blosc.decompress(response.content)
        data_mat = np.fromstring(raw_data, dtype=np.uint64)
        data_mat = np.reshape(data_mat, (16, 128, 128), order='C')

        # Test for data equality (what you put in is what you got back!)
        np.testing.assert_array_equal(data_mat, test_mat)

        # get the bounding box

        # Create request
        factory = APIRequestFactory()
        request = factory.get('/' + version + '/boundingbox/col1/exp1/bbchan1/0/4')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = BoundingBox.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1',
                                         resolution='0', id='4')

        self.assertEqual(response.status_code, status.HTTP_200_OK)
        bb = response.data
        self.assertEqual(bb['t_range'], [0, 1])
        self.assertEqual(bb['x_range'], [1536, 2048])
        self.assertEqual(bb['y_range'], [1536, 2048])
        self.assertEqual(bb['z_range'], [0, 16])
Exemplo n.º 39
0
 def put(self, compressed):
     chunk = blosc.decompress(compressed)
     if self.have_chunks:
         self.chunks[self.i] = chunk
         self.i += 1
     else:
         self.chunks.append(chunk)
     return len(chunk)
Exemplo n.º 40
0
 def getScreens(self):
     if State.useCompression:
         s = []
         for i in range(4):
             s.append(np.reshape(np.fromstring(blosc.decompress(self.screens[i]), dtype=np.uint8), (84, 84, 1)))
     else:
         s = self.screens
     return np.concatenate(s, axis=2)
Exemplo n.º 41
0
def decompress(data, method, *args, **kwds):
    if method == '':
        return data
    _check_method(method)
    
    if method.startswith('blosc-'):
        return blosc.decompress(data)
    else:
        raise ValueError("Unknown compression method '%s'" % method)
Exemplo n.º 42
0
def unpickle(filepath):
    arr = []
    with open(filepath, 'rb') as f:
        while True:
            carr = f.read(blosc.MAX_BUFFERSIZE)
            if len(carr) == 0:
                break
            arr.append(blosc.decompress(carr))
    return pickle_.loads(b"".join(arr))
Exemplo n.º 43
0
 def test_all_filters(self):
     s = b'0123456789'*100
     filters = [blosc.NOSHUFFLE, blosc.SHUFFLE]
     # BITFILTER only works properly from 1.8.0 on
     if LooseVersion(blosc.blosclib_version) >= LooseVersion("1.8.0"):
         filters.append(blosc.BITSHUFFLE)
     for filter_ in filters:
         c = blosc.compress(s, typesize=1, shuffle=filter_)
         d = blosc.decompress(c)
         self.assertEqual(s, d)
Exemplo n.º 44
0
    def test_decompress_releasegil(self):
        import numpy as np
        # assume the expected answer was compressed from bytes
        blosc.set_releasegil(True)
        expected = b'0123456789'
        compressed = blosc.compress(expected, typesize=1)

        # now for all the things that support the buffer interface
        if not PY3X:
            # Python 3 no longer has the buffer
            self.assertEqual(expected, blosc.decompress(buffer(compressed)))
        if not PY26:
            # memoryview doesn't exist on Python 2.6
            self.assertEqual(expected,
                             blosc.decompress(memoryview(compressed)))

        self.assertEqual(expected, blosc.decompress(bytearray(compressed)))
        self.assertEqual(expected, blosc.decompress(np.array([compressed])))
        blosc.set_releasegil(False)
 def read_one(self, n):
     conn = self.engine.connect()
     q = sa.select(columns = [ 'arraybuffer' ], whereclause = 'num = {}'.format(n), from_obj = [self.table ])
     row = conn.execute(q).first()
     arraybuffer = row['arraybuffer']
     if self.compress:
         arraybuffer = blosc.decompress(str(arraybuffer))
     a = np.frombuffer(arraybuffer, dtype  = arr.dtype)
     
     return a
Exemplo n.º 46
0
    def test_channel_uint64_cuboid_aligned_no_offset_no_time_blosc(self):
        """ Test uint64 data, cuboid aligned, no offset, no time samples"""

        test_mat = np.ones((128, 128, 16))
        test_mat = test_mat.astype(np.uint64)
        test_mat = test_mat.reshape((16, 128, 128))
        bb = blosc.compress(test_mat, typesize=64)

        # Create request
        factory = APIRequestFactory()
        request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/', bb,
                               content_type='application/blosc')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None)
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Create Request to get data you posted
        request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/',
                              accepts='application/blosc')

        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None).render()
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        # Decompress
        raw_data = blosc.decompress(response.content)
        data_mat = np.fromstring(raw_data, dtype=np.uint64)
        data_mat = np.reshape(data_mat, (16, 128, 128), order='C')

        # Test for data equality (what you put in is what you got back!)
        np.testing.assert_array_equal(data_mat, test_mat)

        # get the bounding box

        # Create request
        factory = APIRequestFactory()
        request = factory.get('/' + version + '/ids/col1/exp1/layer1/0/0:128/0:128/0:16/')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Ids.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                         resolution='0', x_range='0:128', y_range='0:128', z_range='0:16',
                                         t_range=None)

        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(response.data['ids'], ['1'])
Exemplo n.º 47
0
    def __getitem__(self, item):
        session = self.Session()
        t = session.query(UP.raw_record).filter(
            (UP.id == item) | (UP.STRING == item) | (UP.RefSeq == item) | (UP.Uni_name == item)
        ).first()
        session.close()
        if t is None:
            return None
        r = SeqIO.read(StringIO(blosc.decompress(t[0])), 'swiss')

        return r
Exemplo n.º 48
0
def __MRCZImport( f, header, endian='le', fileConvention = "imod", returnHeader = False, n_threads=None ):
    """
    Equivalent to MRCImport, but for compressed data using the blosc library.
    
    The following compressors are supported: 
        'zlib'
        'zstd'
        'lz4' 
    
    Memory mapping is not possible in this case at present.  
    

    """
    if not bloscPresent:
        print( "ioMRC: blosc not present, cannot compress files." )
        return
        
    if n_threads == None:
        blosc.nthreads = blosc.detect_number_of_cores()
    else:
        blosc.nthreads = n_threads
        
    image = np.empty( header['dimensions'], dtype=header['dtype'] )
    
    # We can read MRC2014 files that don't start at 1024 bytes, but not write them 
    # (as they are non-standard and we don't like breaking stuff)
    blosc_chunk_pos = 1024 + header['extendedBytes']
    for J in np.arange(image.shape[0]):
        f.seek( blosc_chunk_pos )
        ( (nbytes, blockSize, ctbytes ), (ver_info) ) = readBloscHeader(f)
        f.seek(blosc_chunk_pos)
        # blosc includes the 16 header bytes in ctbytes
        image[J,:,:] = np.reshape( 
            np.frombuffer( blosc.decompress( f.read( ctbytes ) ), dtype=image.dtype ),
            image.shape[1:] )
            
        blosc_chunk_pos += (ctbytes)
        pass
    
    
    if header['MRCtype'] == 101:
        # Seems the 4-bit is interlaced 
        interlaced_image = image
            
        image = np.empty( np.product(header['dimensions']), dtype=header['dtype'] )
        # Bit-shift and Bit-and to seperate decimated pixels
        image[0::2] = np.left_shift(interlaced_image,4) / 15
        image[1::2] = np.right_shift(interlaced_image,4)

    # We don't need to reshape packed data.
    image = np.squeeze( image )
    
    return image, header
Exemplo n.º 49
0
 def fget(self , inst):
     
     if hasattr(inst, self.name+'_array') :
         return getattr(inst, self.name+'_array')
     
     nprow = getattr(inst, 'NumpyArrayTable__'+self.name)
     
     
     #~ print 'fget',self.name,  nprow, inst.id
     
     
     if nprow is None or nprow.shape is None or nprow.dtype is None:
         return None
     
     if nprow.shape =='':
         shape = ()
     else:
         shape = tuple([ int(v) for v in  nprow.shape.split(',') ])
     
     dt = np.dtype(nprow.dtype)
     
     if nprow.compress == 'blosc':
         buf = blosc.decompress(nprow.blob)
     elif nprow.compress == 'zlib':
         buf = zlib.decompress(nprow.blob)
     elif nprow.compress == 'lz4':
         buf = lz4.decompress(nprow.blob)
     elif nprow.compress == 'snappy':
         buf = snappy.decompress(nprow.blob)        
     elif nprow.compress is None:
         buf = nprow.blob
         
         
     if np.prod(shape)==0:
         if len(buf) != 0:
             arr = np.frombuffer( buf , dtype = dt)
         else:
             arr= np.empty( shape, dtype = dt )
     else:
         arr = np.frombuffer( buf , dtype = dt)
         arr.flags.writeable = True
         arr = arr.reshape(shape)
     
     if self.arraytype == pq.Quantity:
         arr = pq.Quantity(arr, units = nprow.units, copy =False)
     
     # next access will be direct
     setattr(inst, self.name+'_array', arr)
     
     #~ delattr(inst, 'NumpyArrayTable__'+self.name)
     
     return arr
    def recv_loop(self):
        
        np_array = self.sharedmem_stream['shared_array'].to_numpy_array()        
        half_size = np_array.shape[1]/2
        n = self.sharedmem_stream['nb_channel']
        while self.running:
            events = self.recv_socket.poll(50)
            if events ==0:
                time.sleep(.05)
                if time.time()- self.last_packet_time>self.timeout_reconnect:
                    np_array[:]=0
                    self.start(first_start = False)
                continue
            m0,m1 = self.recv_socket.recv_multipart()
            self.last_packet_time = time.time()
            
            abs_pos = msgpack.loads(m0)
            if self.last_pos>abs_pos:
                print 'restart because last not googd'
                self.start(first_start = False)
                
                continue
            
            if self.compress is None:
                buf = buffer(m1)
            elif self.compress == 'blosc':
                buf = blosc.decompress(m1)
            
            chunk = np.frombuffer(buf, dtype = np_array.dtype, ).reshape(-1, n).transpose()
            #~ print 'recv', abs_pos, chunk.shape
            
            new = chunk.shape[1]
            head = abs_pos%half_size+half_size
            tail = head - new
            np_array[:,  tail:head] = chunk

            head = abs_pos%half_size+half_size
            tail = head - new
            np_array[:,  tail:head] = chunk
            head2 = abs_pos%half_size
            tail2 = max(head2 - new, 0)
            new2 = head2-tail2
            if new2!=0:
                np_array[:,  tail2:head2] = chunk[:, -new2:]

            self.send_socket.send(msgpack.dumps(abs_pos))
            self.last_pos = abs_pos
 def read_one(self, n):
     conn = self.engine.connect()
     q = sa.select(columns = [ 'id', 'arrsize' ], whereclause = 'table1.num = {}'.format(n), from_obj = [self.table ])
     row = conn.execute(q).first()
     table1_id = row['id']
     
     q = sa.select(columns = [ 'smallbuffer' ], whereclause = 'table2.table1_id = {}'.format(table1_id),
                                     from_obj = [self.table2 ], order_by = ['table2.buffernum'] )
     
     a = np.empty((row['arrsize'],), dtype = arr.dtype)
     pos = 0
     for smallbuffer, in conn.execute(q):
         arr_chunk= np.frombuffer(blosc.decompress(str(smallbuffer)), dtype  = arr.dtype)
         a[pos:pos+arr_chunk.size] = arr_chunk
         pos += arr_chunk.size
     
     return a
Exemplo n.º 52
0
def unpack_file(fn, encoding='utf8'):
    """ Unpack numpy array from filename

    Supports binary data with bloscpack and text data with msgpack+blosc

    >>> unpack_file('foo.blp')  # doctest: +SKIP
    array([1, 2, 3])

    See also:
        pack_file
    """
    try:
        return bloscpack.unpack_ndarray_file(fn)
    except ValueError:
        with open(fn, 'rb') as f:
            return np.array(msgpack.unpackb(blosc.decompress(f.read()),
                                            encoding=encoding))
Exemplo n.º 53
0
    def test_channel_uint64_filter_ids_not_found(self):
        """ Test filter_cutout by ids not in the region"""

        test_mat = np.ones((128, 128, 4))
        test_mat[0][0][0] = 2
        test_mat[0][0][1] = 3
        test_mat[0][0][2] = 4
        test_mat = test_mat.reshape(4, 128, 128)
        test_mat = test_mat.astype(np.uint64)
        h = test_mat.tobytes()
        bb = blosc.compress(h, typesize=64)

        # Create request
        factory = APIRequestFactory()
        request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/', bb,
                               content_type='application/blosc')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None)

        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Create Request to get data you posted
        request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/?filter=5,6,7',
                              accepts='application/blosc')

        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None).render()
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        # Decompress
        raw_data = blosc.decompress(response.content)
        data_mat = np.fromstring(raw_data, dtype=np.uint64)
        data_mat = np.reshape(data_mat, (4, 128, 128), order='C')

        # Test for data equality (what you put in is what you got back!)
        np.testing.assert_array_equal(np.unique(data_mat), np.arange(0, 1, dtype=np.uint64))
Exemplo n.º 54
0
def unconvert(values, dtype, compress=None):

    if dtype == np.object_:
        return np.array(values, dtype=object)

    values = values.encode('latin1')

    if compress == 'zlib':
        import zlib
        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == 'blosc':
        import blosc
        values = blosc.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values, dtype=dtype)
Exemplo n.º 55
0
    def test_channel_uint16_cuboid_unaligned_offset_time_blosc(self):
        """ Test uint16 data, not cuboid aligned, offset, time samples, blosc interface

        Test Requires >=2GB of memory!
        """

        test_mat = np.random.randint(1, 2**16-1, (3, 17, 300, 500))
        test_mat = test_mat.astype(np.uint16)
        h = test_mat.tobytes()
        bb = blosc.compress(h, typesize=16)

        # Create request
        factory = APIRequestFactory()
        request = factory.post('/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3', bb,
                               content_type='application/blosc')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2',
                                    resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3')
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Create Request to get data you posted
        request = factory.get('/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3',
                              HTTP_ACCEPT='application/blosc')

        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2',
                                    resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3').render()
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        # Decompress
        raw_data = blosc.decompress(response.content)
        data_mat = np.fromstring(raw_data, dtype=np.uint16)
        data_mat = np.reshape(data_mat, (3, 17, 300, 500), order='C')

        # Test for data equality (what you put in is what you got back!)
        np.testing.assert_array_equal(data_mat, test_mat)
Exemplo n.º 56
0
    def unpack_array(self, data, num_time_points=1):
        """Method to uncompress and deserialize the provided data.

        If only a single time point provided,

        Args:
            data (bytes): The array to pack
            num_time_points (int): Number of time samples in the compressed data

        Returns:
            (np.ndarray): The resulting serialized and compressed byte array
        """
        if not self.datatype:
            raise SpdbError("Cube instance must have datatype parameter set to enable deserialization.",
                            ErrorCodes.SERIALIZATION_ERROR)

        raw_data = blosc.decompress(data)
        data_mat = np.fromstring(raw_data, dtype=self.datatype)
        data_mat = np.reshape(data_mat, (num_time_points, self.z_dim, self.y_dim, self.x_dim), order='C')

        return data_mat
Exemplo n.º 57
0
    def test_channel_uint64_cuboid_aligned_offset_no_time_blosc(self):
        """ Test uint64 data, cuboid aligned, offset, no time samples, blosc interface"""

        test_mat = np.random.randint(1, 256, (4, 128, 128))
        test_mat = test_mat.astype(np.uint64)
        h = test_mat.tobytes()
        bb = blosc.compress(h, typesize=64)

        # Create request
        factory = APIRequestFactory()
        request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/', bb,
                               content_type='application/blosc')
        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None)

        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Create Request to get data you posted
        request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/128:256/256:384/16:20/',
                              accepts='application/blosc')

        # log in user
        force_authenticate(request, user=self.user)

        # Make request
        response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1',
                                    resolution='0', x_range='128:256', y_range='256:384', z_range='16:20', t_range=None).render()
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        # Decompress
        raw_data = blosc.decompress(response.content)
        data_mat = np.fromstring(raw_data, dtype=np.uint64)
        data_mat = np.reshape(data_mat, (4, 128, 128), order='C')

        # Test for data equality (what you put in is what you got back!)
        np.testing.assert_array_equal(data_mat, test_mat)
Exemplo n.º 58
0
def unconvert(values, dtype, compress=None):

    if dtype == np.object_:
        return np.array(values, dtype=object)

    if compress == "zlib":

        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == "blosc":

        if not _BLOSC:
            raise Exception("cannot uncompress w/o blosc")

        # decompress
        values = blosc.decompress(values)

        return np.frombuffer(values, dtype=dtype)

    # as a list
    return np.array(values, dtype=dtype)
Exemplo n.º 59
0
def unconvert(values, dtype, compress=None):

    if dtype == np.object_:
        return np.array(values, dtype=object)

    if compress == 'zlib':

        values = zlib.decompress(values)
        return np.frombuffer(values, dtype=dtype)

    elif compress == 'blosc':

        if not _BLOSC:
            raise Exception("cannot uncompress w/o blosc")

        # decompress
        values = blosc.decompress(values)

        return np.frombuffer(values, dtype=dtype)

    # from a string
    return np.fromstring(values.encode('latin1'), dtype=dtype)
Exemplo n.º 60
0
def json_numpy_obj_hook(dct):
    """Decodes a previously encoded numpy ndarray with proper shape and dtype.
    And decompresses the data with blosc

    :param dct: (dict) json encoded ndarray
    :return: (ndarray) if input was an encoded ndarray
    """
    if isinstance(dct, dict) and '__ndarray__' in dct:
        array = dct['__ndarray__']
        if sys.version_info >= (3, 0):  # http://stackoverflow.com/questions/24369666/typeerror-b1-is-not-json-serializable
            array = array.encode('utf-8')
        data = base64.b64decode(array)
        if has_blosc:
            data = blosc.decompress(data)

        try:
            dtype = np.dtype(ast.literal_eval(dct['dtype']))
        except ValueError:  # If the array is not a recarray
            dtype = dct['dtype']

        return np.frombuffer(data, dtype).reshape(dct['shape'])

    return dct