示例#1
0
    def _read_bucket(self, doc, column_set, column_dtypes, include_symbol,
                     include_images, columns):
        rtn = {}
        if doc[VERSION] != 3:
            raise ArcticException("Unhandled document version: %s" %
                                  doc[VERSION])
        # np.cumsum copies the read-only array created with frombuffer
        rtn[INDEX] = np.cumsum(
            np.frombuffer(lz4_decompress(doc[INDEX]), dtype='uint64'))
        doc_length = len(rtn[INDEX])
        column_set.update(doc[COLUMNS].keys())

        # get the mask for the columns we're about to load
        union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                # the or below will make a copy of this read-only array
                mask = np.frombuffer(lz4_decompress(coldata[ROWMASK]),
                                     dtype='uint8')
                union_mask = union_mask | mask
            except KeyError:
                rtn[c] = None
        union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
        rtn_length = np.sum(union_mask)

        rtn[INDEX] = rtn[INDEX][union_mask]
        if include_symbol:
            rtn['SYMBOL'] = [
                doc[SYMBOL],
            ] * rtn_length

        # Unpack each requested column in turn
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                dtype = np.dtype(coldata[DTYPE])
                # values ends up being copied by pandas before being returned to the user. However, we
                # copy it into a bytearray here for safety.
                values = np.frombuffer(bytearray(lz4_decompress(
                    coldata[DATA])),
                                       dtype=dtype)
                self._set_or_promote_dtype(column_dtypes, c, dtype)
                rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
                # unpackbits will make a copy of the read-only array created by frombuffer
                rowmask = np.unpackbits(
                    np.frombuffer(lz4_decompress(coldata[ROWMASK]),
                                  dtype='uint8'))[:doc_length].astype('bool')
                rowmask = rowmask[union_mask]
                rtn[c][rowmask] = values
            except KeyError:
                rtn[c] = None

        if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
            rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length,
                                      column_dtypes, column_set, columns)
        return rtn
示例#2
0
async def get_master(res_ver, to_path):
    manifest = await read_manifest(res_ver, "Android", "High", "High")
    if not manifest:
        return None

    cur = manifest.execute("SELECT hash, attr FROM manifests WHERE name = ?",
                           ("master.mdb", ))
    hash, attr = cur.fetchone()
    manifest.close()

    url = SQLBASEURL.format(hash, hash[0:2])
    cl = httpclient.AsyncHTTPClient()

    try:
        mashttp = await cl.fetch(url, headers=extra_acquisition_headers())
    except Exception as e:
        print("get_master: unhandled error while getting master:", e)
        return None

    buf = mashttp.buffer.read()
    bio = io.BytesIO()
    bio.write(buf[4:8])
    bio.write(buf[16:])
    data = lz4_decompress(bio.getvalue())
    with open(to_path, "wb") as write_db:
        write_db.write(data)

    mdate = mashttp.headers.get("Last-Modified")
    if mdate:
        tt = parsedate_tz(mdate)
        mtime = mktime_tz(tt) if tt else int(time())
    else:
        mtime = int(time.time())
    os.utime(to_path, (-1, mtime))
    return to_path
示例#3
0
    def decompress(self, source, cursor, compressedbytes, uncompressedbytes=None):
        if self.algo == uproot.const.kZLIB:
            from zlib import decompress as zlib_decompress
            return zlib_decompress(cursor.bytes(source, compressedbytes))

        elif self.algo == uproot.const.kLZMA:
            try:
                from lzma import decompress as lzma_decompress
            except ImportError:
                try:
                    from backports.lzma import decompress as lzma_decompress
                except ImportError:
                    raise ImportError("Install lzma package with:\n    pip install backports.lzma\nor\n    conda install -c conda-forge backports.lzma\n(or just use Python >= 3.3).")
            return lzma_decompress(cursor.bytes(source, compressedbytes))

        elif self.algo == uproot.const.kOldCompressionAlgo:
            raise NotImplementedError("ROOT's \"old\" algorithm (fCompress 300) is not supported")

        elif self.algo == uproot.const.kLZ4:
            try:
                from lz4.block import decompress as lz4_decompress
            except ImportError:
                raise ImportError("Install lz4 package with:\n    pip install lz4\nor\n    conda install -c anaconda lz4")

            if uncompressedbytes is None:
                raise ValueError("lz4 needs to know the uncompressed number of bytes")
            return lz4_decompress(cursor.bytes(source, compressedbytes), uncompressed_size=uncompressedbytes)

        else:
            raise ValueError("unrecognized compression algorithm: {0}".format(self.algo))
示例#4
0
    def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_images, columns):
        rtn = {}
        if doc[VERSION] != 3:
            raise ArcticException("Unhandled document version: %s" % doc[VERSION])
        # np.cumsum copies the read-only array created with frombuffer
        rtn[INDEX] = np.cumsum(np.frombuffer(lz4_decompress(doc[INDEX]), dtype='uint64'))
        doc_length = len(rtn[INDEX])
        column_set.update(doc[COLUMNS].keys())

        # get the mask for the columns we're about to load
        union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                # the or below will make a copy of this read-only array
                mask = np.frombuffer(lz4_decompress(coldata[ROWMASK]), dtype='uint8')
                union_mask = union_mask | mask
            except KeyError:
                rtn[c] = None
        union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
        rtn_length = np.sum(union_mask)

        rtn[INDEX] = rtn[INDEX][union_mask]
        if include_symbol:
            rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length

        # Unpack each requested column in turn
        for c in column_set:
            try:
                coldata = doc[COLUMNS][c]
                dtype = np.dtype(coldata[DTYPE])
                # values ends up being copied by pandas before being returned to the user. However, we
                # copy it into a bytearray here for safety.
                values = np.frombuffer(bytearray(lz4_decompress(coldata[DATA])), dtype=dtype)
                self._set_or_promote_dtype(column_dtypes, c, dtype)
                rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
                # unpackbits will make a copy of the read-only array created by frombuffer
                rowmask = np.unpackbits(np.frombuffer(lz4_decompress(coldata[ROWMASK]),
                                        dtype='uint8'))[:doc_length].astype('bool')
                rowmask = rowmask[union_mask]
                rtn[c][rowmask] = values
            except KeyError:
                rtn[c] = None

        if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
            rtn = self._prepend_image(rtn, doc[IMAGE_DOC], rtn_length, column_dtypes, column_set, columns)
        return rtn
示例#5
0
def decompress_array(str_list):
    """
    Decompress a list of strings
    """
    if not str_list:
        return str_list

    if not ENABLE_PARALLEL or len(str_list) <= LZ4_N_PARALLEL:
        return [lz4_decompress(chunk) for chunk in str_list]

    return _compress_thread_pool.map(lz4_decompress, str_list)
示例#6
0
def decompress_array(str_list):
    """
    Decompress a list of strings
    """
    global _compress_thread_pool

    if not str_list:
        return str_list

    if not ENABLE_PARALLEL or len(str_list) <= LZ4_N_PARALLEL:
        return [lz4_decompress(chunk) for chunk in str_list]

    if _compress_thread_pool is None:
        _compress_thread_pool = ThreadPool(LZ4_WORKERS)
    return _compress_thread_pool.map(lz4_decompress, str_list)
示例#7
0
def decompress_array(str_list):
    """
    Decompress a list of strings
    """
    global _compress_thread_pool

    if not str_list:
        return str_list

    if not ENABLE_PARALLEL or len(str_list) <= LZ4_N_PARALLEL:
        return [lz4_decompress(chunk) for chunk in str_list]

    if _compress_thread_pool is None:
        _compress_thread_pool = ThreadPool(LZ4_WORKERS)
    return _compress_thread_pool.map(lz4_decompress, str_list)
示例#8
0
 def decode(self) -> MappingsBuilder:
     try:
         header = self.read_nullterm()
     except BinaryMappingsError as e:
         raise BinaryMappingsError("Invalid header!") from e.__cause__
     if header != "SuperSrg binary mappings":
         raise BinaryMappingsError(f"Unexpected header: {header}")
     version = self.read_u32()
     if version != 1:
         raise BinaryMappingsError(f"Unexpected version: {version}")
     compression = self.read_string()
     if compression == "":
         # Continue to treat uncompressed data as-is
         pass
     elif compression == "lz4-block":
         if lz4_decompress is None:
             raise BinaryMappingsError(f"Missing lz4 compression module!")
         decompressed = lz4_decompress(self.data_view[index:])
         self.data = decompressed
         self.data_view = memoryview(decompressed)
         self.index = 0
     elif compression in ("lzma2", "gzip"):
         raise BinaryMappingsError(f"Unsupported compression: {compression}")
     else:
         raise BinaryMappingsError(f"Forbidden compression: {compression}")
     builder = MappingsBuilder()
     num_classes = self.read_u64()
     for _ in range(num_classes):
         original_class = JavaClass(self.read_string())
         revised_class_name = self.read_string()
         revised_class = JavaClass(revised_class_name) if revised_class_name else original_class
         num_methods = self.read_u32()
         for _ in range(num_methods):
             original_name = self.read_string()
             revised_name = self.read_string()
             original_signature = MethodSignature.parse(self.read_string())
             self.read_string()  # Ignore the revised signature
             original_data = MethodData(original_class, original_name, original_signature)
             builder.method_names[original_data] = intern(revised_name)
         num_fields = self.read_u32()
         for _ in range(num_fields):
             original_name = self.read_string()
             revised_name = self.read_string()
             original_data = FieldData(original_class, original_name)
             assert original_name != revised_name, f"Redundant field: {original_data}"
             builder.field_names[original_data] = intern(revised_name)
     return builder
示例#9
0
def test_performance_sequential(n, length):
    _str = random_string(length)
    _strarr = [_str for _ in range(n)]
    now = dt.now()
    [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
    clz4_time = (dt.now() - now).total_seconds()
    now = dt.now()
    c.decompress_array(c.compressHC_array(_strarr))
    clz4_time_p = (dt.now() - now).total_seconds()
    now = dt.now()
    [lz4_decompress(y) for y in [lz4_compress(x) for x in _strarr]]
    lz4_time = (dt.now() - now).total_seconds()
    print()
    print("LZ4 Test %sx len:%s" % (n, length))
    print("    LZ4 HC %s s" % clz4_time)
    print("    LZ4 HC Parallel %s s" % clz4_time_p)
    print("    LZ4 %s s" % lz4_time)
示例#10
0
def test_performance_sequential(n, length):
    _str = random_string(length)
    _strarr = [_str for _ in range(n)]
    now = dt.now()
    [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
    clz4_time = (dt.now() - now).total_seconds()
    now = dt.now()
    c.decompress_array(c.compressHC_array(_strarr))
    clz4_time_p = (dt.now() - now).total_seconds()
    now = dt.now()
    [lz4_decompress(y) for y in [lz4_compress(x) for x in _strarr]]
    lz4_time = (dt.now() - now).total_seconds()
    print()
    print("LZ4 Test %sx len:%s" % (n, length))
    print("    LZ4 HC %s s" % clz4_time)
    print("    LZ4 HC Parallel %s s" % clz4_time_p)
    print("    LZ4 %s s" % lz4_time)
示例#11
0
def _decompressfcn(compression, objlen, debug=False):
    algo, level = compression
    if algo == "zlib":
        # skip 9-byte header for ROOT's custom frame:
        # https://github.com/root-project/root/blob/master/core/zip/src/Bits.h#L646
        if debug:

            def out(x):
                print("decompressing {0} bytes".format(len(x) - 9))
                return zlib_decompress(x[9:])

            return out
        else:
            return lambda x: zlib_decompress(x[9:])

    elif algo == "lzma":
        # skip 9-byte header for LZMA, too:
        # https://github.com/root-project/root/blob/master/core/lzma/src/ZipLZMA.c#L81
        if debug:

            def out(x):
                print("decompressing {0} bytes".format(len(x) - 9))
                return lzma_decompress(x[9:])

            return out
        else:
            return lambda x: lzma_decompress(x[9:])

    elif algo == "lz4":
        # skip 9-byte header plus 8-byte hash: are there any official ROOT versions without the hash?
        # https://github.com/root-project/root/blob/master/core/lz4/src/ZipLZ4.cxx#L38
        if debug:

            def out(x):
                print("decompressing {0} bytes".format(len(x) - 9 - 8))
                return lz4_decompress(x[9 + 8:], uncompressed_size=objlen)

            return out
        else:
            return lambda x: lz4_decompress(x[9 + 8:],
                                            uncompressed_size=objlen)

    else:
        raise NotImplementedError("cannot decompress \"{0}\"".format(algo))
示例#12
0
async def acquire_manifest(version, platform, asset_qual, sound_qual,
                           dest_file):
    cl = httpclient.AsyncHTTPClient()
    meta = "/".join((DBMANIFEST.format(version), "all_dbmanifest"))
    try:
        meta = await cl.fetch(meta, headers=extra_acquisition_headers())
    except Exception as e:
        print("acquire_manifest: unhandled error while getting meta:", e)
        return None

    m = meta.body.decode("utf8")
    mp = map(lambda x: manifest_selector_t(*x.split(",")),
             filter(bool, m.split("\n")))
    get_file = None
    for selector in mp:
        if selector.platform == platform and \
           selector.asset_qual == asset_qual and \
           selector.sound_qual == sound_qual:
            get_file = selector.filename
            break
    else:
        print("No candidate found for", platform, asset_qual, sound_qual)
        return None

    abso = "/".join((DBMANIFEST.format(version), get_file))
    try:
        mani = await cl.fetch(abso, headers=extra_acquisition_headers())
    except Exception as e:
        print("acquire_manifest: unhandled error while getting meta:", e)
        return None

    buf = mani.buffer.read()
    bio = io.BytesIO()
    bio.write(buf[4:8])
    bio.write(buf[16:])
    data = lz4_decompress(bio.getvalue())
    with open(dest_file, "wb") as write_db:
        write_db.write(data)
    return dest_file
示例#13
0
def mozlz4_decompress(data):
    if len(data) < 8 or data[:8] != b'mozLz40\0':
        raise Exception('Invalid mozlz4 header')
    return lz4_decompress(data[8:])
示例#14
0
def decompress(_str):
    """
    Decompress a string
    """
    return lz4_decompress(_str)
示例#15
0
 def out(x):
     print("decompressing {0} bytes".format(len(x) - 9 - 8))
     return lz4_decompress(x[9 + 8:], uncompressed_size=objlen)
示例#16
0
def decompress(_str):
    """
    Decompress a string
    """
    return lz4_decompress(_str)