def test_decompress_trailer(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" compressed = lz4frame.compress(input_data) with self.assertRaisesRegexp(ValueError, r'^Extra data: 64 trailing bytes'): lz4frame.decompress(compressed + b'A' * 64) # This API does not support frame concatenation! with self.assertRaisesRegexp(ValueError, r'^Extra data: \d+ trailing bytes'): lz4frame.decompress(compressed + compressed)
def test_checksum_failure(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" compressed = lz4frame.compress( input_data, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED) with self.assertRaisesRegexp( RuntimeError, r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid' ): last = struct.unpack('B', compressed[-1:])[0] lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42))
def test_block_checksum_failure(data): compressed = lz4frame.compress( data, content_checksum=True, block_checksum=True, return_bytearray=True, ) message = r'^LZ4F_decompress failed with code: ERROR_blockChecksum_invalid$' if len(compressed) > 32: with pytest.raises(RuntimeError, message=message): compressed[18] = compressed[18] ^ 0x42 lz4frame.decompress(compressed)
def test_compress_without_content_size(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" compressed = lz4frame.compress(input_data, content_size_header=False) frame = lz4frame.get_frame_info(compressed) self.assertEqual(frame['contentSize'], 0) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def update_network(self, minibatchs): indices_all, priorities_all, losses = [], [], [] with util.Timer("Learner update:"): for (indices, weights, samples) in minibatchs: samples = [pickle.loads(lz4f.decompress(s)) for s in samples] priorities, loss_info = self.update(weights, samples) indices_all += indices priorities_all += priorities losses.append(loss_info) current_weights = self.get_weights() total_loss = sum([l[0] for l in losses]) / len(losses) policy_loss = sum([l[1] for l in losses]) / len(losses) value_loss = sum([l[2] for l in losses]) / len(losses) reward_loss = sum([l[3] for l in losses]) / len(losses) losses_mean = (total_loss, policy_loss, value_loss, reward_loss) return (current_weights, indices_all, priorities_all, losses_mean)
def test_spark_functionality(): pyspark = pytest.importorskip("pyspark", minversion="2.4.1") import lz4.frame as lz4f import cloudpickle as cpkl from coffea import hist from coffea.processor.spark.tests import check_spark_functionality one, two, hists = check_spark_functionality() #check that spark produced the correct output dataset assert ((one == two) and (one == 3)) #make sure that the returned histograms are all the same (as they should be in this case) assert ((len(hists[0]) == len(hists[1])) and (len(hists[1]) == len(hists[2]))) inflated = [] for i in range(3): inflated.append(cpkl.loads(lz4f.decompress(hists[i]))) final = inflated.pop() for ihist in inflated: final.add(ihist) #make sure the accumulator is working right after being blobbed through spark assert (final['cutflow']['dummy'] == 3)
def nope_test_spark_functionality(): try: import pyspark except ImportError: warnings.warn('pyspark not installed, skipping tests') return except Exception as e: warnings.warn('other error when trying to import pyspark!') raise e import lz4.frame as lz4f import cloudpickle as cpkl from fnal_column_analysis_tools import hist from fnal_column_analysis_tools.processor.spark.tests import check_spark_functionality one, two, hists = check_spark_functionality() #check that spark produced the correct output dataset assert ((one == two) and (one == 3)) #make sure that the returned histograms are all the same (as they should be in this case) assert ((len(hists[0]) == len(hists[1])) and (len(hists[1]) == len(hists[2]))) inflated = [] for i in range(3): inflated.append(cpkl.loads(lz4f.decompress(hists[i]))) final = inflated.pop() for ihist in inflated: final.add(ihist) #make sure the accumulator is working right after being blobbed through spark assert (final['cutflow']['dummy'] == 3)
def __call__(self, dfk, items, processor_instance, output, unit='items', desc='Processing', timeout=None, flatten=True): procstr = lz4f.compress(cpkl.dumps(processor_instance)) nitems = len(items) ftr_to_item = set() for dataset, fn, treename, chunksize, index in items: if dataset not in self._counts: self._counts[dataset] = 0 ftr_to_item.add( coffea_pyapp(dataset, fn, treename, chunksize, index, procstr, timeout=timeout, flatten=flatten)) for ftr in tqdm(as_completed(ftr_to_item), total=nitems, unit='items', desc='Processing'): blob, nentries, dataset = ftr.result() self._counts[dataset] += nentries output.add(pkl.loads(lz4f.decompress(blob)))
def _work_function(item, processor_instance, flatten=False, savemetrics=False, mmap=False): if processor_instance == 'heavy': item, processor_instance = item if not isinstance(processor_instance, ProcessorABC): processor_instance = cloudpickle.loads(lz4f.decompress(processor_instance)) if mmap: localsource = {} else: opts = dict(uproot.FileSource.defaults) opts.update({'parallel': None}) def localsource(path): return uproot.FileSource(path, **opts) file = uproot.open(item.filename, localsource=localsource) tree = file[item.treename] df = LazyDataFrame(tree, item.chunksize, item.index, flatten=flatten) df['dataset'] = item.dataset tic = time.time() out = processor_instance.process(df, locals()) toc = time.time() metrics = dict_accumulator() if savemetrics: if isinstance(file.source, uproot.source.xrootd.XRootDSource): metrics['bytesread'] = value_accumulator(int, file.source.bytesread) metrics['dataservers'] = set_accumulator({file.source._source.get_property('DataServer')}) metrics['columns'] = set_accumulator(df.materialized) metrics['entries'] = value_accumulator(int, df.size) metrics['processtime'] = value_accumulator(float, toc - tic) wrapped_out = dict_accumulator({'out': out, 'metrics': metrics}) file.source.close() return wrapped_out
def extract_yaml_reports(filename): """Fetch compressed tarball from S3 private repo yield any YAML report """ # Local caching - disabled # if os.path.isfile(filename): # with open(filename, "rb") as f: # fileobj=BytesIO(f.read()) # fileobj.seek(0) # else: # obj = s3.Object("ooni-data-private", "canned/{}".format(filename)) # fileobj = BytesIO(lz4frame.decompress(obj.get()["Body"].read())) # with open(filename, "wb") as f: # fileobj.seek(0) # f.write(fileobj.read()) # fileobj.seek(0) obj = s3.Object("ooni-data-private", "canned/{}".format(filename)) fileobj = BytesIO(lz4frame.decompress(obj.get()["Body"].read())) tf = tarfile.TarFile(fileobj=fileobj) for m in tf.getmembers(): if m.name.endswith(".yaml"): f = tf.extractfile(m) canned_yaml = BytesIO(f.read()) yield (m.name, canned_yaml)
def reduce_histos_raw(df, processor_instance, lz4_clevel): histos = df['histos'] mask = (histos.str.len() > 0) outhist = processor_instance.accumulator.identity() for line in histos[mask]: outhist.add(pkl.loads(lz4f.decompress(line))) return pd.DataFrame(data={'histos': np.array([lz4f.compress(pkl.dumps(outhist), compression_level=lz4_clevel)], dtype='O')})
def generator_data(self, batch_size=20, args={ 'type': "train", 'id': { "$lt": 500 } }): pc = self.db.DataSet.find(args) flist = pc.distinct('imageData') fldict = {} for f in flist: s = self.datafs.get(f).read() s2 = decompress(s) fldict[f] = pickle.loads(s2) print pc.count() for i in range(0, pc.count(), batch_size): pc.rewind() rt = [(fldict[x['imageData']][x['id']].reshape(1, 784), x['label']) for x in pc[i:i + batch_size]] dl = zip(*rt) d = np.concatenate(dl[0]) l = np.array(dl[1]) yield d, l
def find_one_params(self, args={}, sort=None, lz4_decomp=False): """ Find one parameter from MongoDB Buckets. Parameters ---------- args : dictionary, find items. Returns -------- params : the parameters, return False if nothing found. f_id : the Buckets ID of the parameters, return False if nothing found. """ d = self.db.Params.find_one(filter=args, sort=sort) if d is not None: f_id = d['f_id'] else: print(("[TensorDB] Cannot find: {}".format(args))) return False, False st = time.time() d = self.paramsfs.get(f_id).read() # print('get time', time.time()-st) if lz4_decomp: # s = time.time() d = decompress(d) # print('decomp time', time.time()-s) # s = time.time() params = self.__deserialization(d) # print('deseri time', time.time()-s) print(("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-st, 2)))) return params, f_id
def replace_masks(in_db, out_db, synonyms_file): synonyms_dict = get_synonyms_dict(synonyms_file) tok = BertTokenizer.from_pretrained('bert-base-cased') new_set = lmdb.open(out_db, readonly=False, create=True, map_size=10e9) new_set_c = new_set.begin(write=True) env = lmdb.open(in_db, readonly=True, create=False, map_size=4 * 1024**4) txn = env.begin() cursor = txn.cursor() tokenized_queries = {} id2len_dict = {} for key, value in tqdm(cursor): q_id = key.decode() q = msgpack.loads(decompress(value)) query_tokens, query_ids = replace_masks_in_q(q_id, q['input_ids'], synonyms_dict, tok) q['toked_question'] = query_tokens q['input_ids'] = query_ids tokenized_queries[q_id] = q id2len_dict[q_id] = len(query_ids) new_q = compress(msgpack.dumps(q)) new_set_c.put(key, new_q) print("committing changes") new_set_c.commit() with open('{}/id2len.json'.format(out_db), 'w') as outfile: json.dump(id2len_dict, outfile)
def agg_histos(df): global processor_instance, lz4_clevel goodlines = df[df.str.len() > 0] outhist = processor_instance.accumulator.identity() for line in goodlines: outhist.add(cpkl.loads(lz4f.decompress(line))) return lz4f.compress(cpkl.dumps(outhist), compression_level=lz4_clevel)
def decompress_segments(minibatch): inidices, weights, compressed_segments = minibatch segments = [ pickle.loads(lz4f.decompress(compressed_seg)) for compressed_seg in compressed_segments ] return (inidices, weights, segments)
def test_roundtrip_1( data, block_size, block_linked, content_checksum, block_checksum, compression_level, store_size): compressed = lz4frame.compress( data, store_size=store_size, compression_level=compression_level, block_size=block_size, block_linked=block_linked, content_checksum=content_checksum, block_checksum=block_checksum, ) get_frame_info_check( compressed, len(data), store_size, block_size, block_linked, content_checksum, block_checksum, ) decompressed, bytes_read = lz4frame.decompress( compressed, return_bytes_read=True) assert bytes_read == len(compressed) assert decompressed == data
def find_all_params(self, args={}, lz4_decomp=False): """ Find all parameter from MongoDB Buckets Parameters ---------- args : dictionary, find items Returns -------- params : the parameters, return False if nothing found. """ st = time.time() pc = self.db.Params.find(args) if pc is not None: f_id_list = pc.distinct('f_id') params = [] for f_id in f_id_list: # you may have multiple Buckets files tmp = self.paramsfs.get(f_id).read() if lz4_decomp: tmp = decompress(tmp) params.append(self.__deserialization(tmp)) else: print("[TensorDB] Cannot find: {}".format(args)) return False print("[TensorDB] Find all params SUCCESS, took: {}s".format( round(time.time() - st, 2))) return params
def test_parsl_funcs(): parsl = pytest.importorskip("parsl", minversion="0.7.2") import os.path as osp from coffea.processor.parsl.detail import derive_chunks filename = osp.abspath('tests/samples/nano_dy.root') dataset = 'Z+Jets' treename = 'Events' chunksize = 20 ds, tn, test = derive_chunks.func(filename, treename, chunksize, dataset) assert (dataset == ds) assert (treename == tn) assert ('nano_dy.root' in test[0][0]) assert (test[0][1] == 20) assert (test[0][2] == 0) from coffea.processor.parsl.parsl_executor import coffea_pyapp from coffea.processor.test_items import NanoTestProcessor import pickle as pkl import cloudpickle as cpkl import lz4.frame as lz4f procpkl = lz4f.compress(cpkl.dumps(NanoTestProcessor())) out = coffea_pyapp.func('ZJets', filename, treename, chunksize, 0, procpkl) hists = pkl.loads(lz4f.decompress(out[0])) assert (hists['cutflow']['ZJets_pt'] == 4) assert (hists['cutflow']['ZJets_mass'] == 1) assert (out[1] == 10) assert (out[2] == 'ZJets')
def test_LZ4FrameCompressor(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" with lz4frame.LZ4FrameCompressor() as compressor: compressed = compressor.compress(input_data) compressed += compressor.flush() decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def __call__(self, spark, dfslist, theprocessor, output, thread_workers, status=True, unit='datasets', desc='Processing'): # processor needs to be a global global processor_instance, coffea_udf processor_instance = theprocessor # get columns from processor columns = processor_instance.columns cols_w_ds = ['dataset'] + columns # make our udf tmpl = self._env.get_template(self._template_name) render = tmpl.render(cols=columns) exec(render) # cache the input datasets if it's not already done if self._cacheddfs is None: self._cacheddfs = {} self._counts = {} # go through each dataset and thin down to the columns we want for ds, (df, counts) in dfslist.items(): self._cacheddfs[ds] = df.select(*cols_w_ds).cache() self._counts[ds] = counts def spex_accumulator(total, result): ds, df = result total[ds] = df with ThreadPoolExecutor(max_workers=thread_workers) as executor: futures = set() for ds, df in self._cacheddfs.items(): futures.add( executor.submit(self._launch_analysis, ds, df, coffea_udf, cols_w_ds)) # wait for the spark jobs to come in self._rawresults = {} futures_handler(futures, self._rawresults, status, unit, desc, futures_accumulator=spex_accumulator) for ds, bitstream in self._rawresults.items(): if bitstream is None: raise Exception( 'No pandas dataframe returns from spark in dataset: %s, something went wrong!' % ds) if bitstream.empty: raise Exception( 'The histogram list returned from spark is empty in dataset: %s, something went wrong!' % ds) bits = bitstream[bitstream.columns[0]][0] output.add(pkl.loads(lz4f.decompress(bits)))
def test_lz4(self): data = self._data compress = verify_and_get_compress_func("lz4") self.assertIsNotNone(compress) import lz4.frame as lz4 self.assertEqual(data, lz4.decompress(compress(data)))
def calc_unpacked_data(self, byte_data): if lz4 is None: raise errors.InvalidAlgorithm("lz4 module needed for .lz4 support") try: unpacked = lz4.decompress(bytes(byte_data)) except RuntimeError as e: raise errors.InvalidAlgorithm(e) return unpacked
def test_decompress_return_type_2(): c = lz4frame.compress(b'', return_bytearray=False) r = lz4frame.decompress( c, return_bytearray=True, return_bytes_read=False ) assert isinstance(r, bytearray)
def _maybe_decompress(item): if isinstance(item, bytes): item = cloudpickle.loads(lz4f.decompress(item)) if isinstance(item, AccumulatorABC): return item raise ValueError( "Executors can only reduce accumulators or LZ4-compressed pickled accumulators" )
def loads(s): try: return msgpack.unpackb( decompress(s), ext_hook=decode_ext, encoding='utf-8') except Exception: # we queue work occassionally from lambdas or other systems not using # the worker class return job_default_load(s)
def agg_histos_raw(series, processor_instance, lz4_clevel): goodlines = series[series.str.len() > 0] if goodlines.size == 1: # short-circuit trivial aggregations return goodlines[0] outhist = processor_instance.accumulator.identity() for line in goodlines: outhist.add(pkl.loads(lz4f.decompress(line))) return lz4f.compress(pkl.dumps(outhist), compression_level=lz4_clevel)
def dir_delta(sig, new_path): with open(RDIFF_SIG_FILENAME, "wb") as f: f.write(decompress(sig)) p = subprocess.run(["rdiffdir", "delta", RDIFF_SIG_FILENAME, new_path, "-"], stdout=subprocess.PIPE, input=sig) os.remove(RDIFF_SIG_FILENAME) return compress(p.stdout)
def test_decompress_truncated(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" for chksum in (lz4frame.CONTENTCHECKSUM_DISABLED, lz4frame.CONTENTCHECKSUM_ENABLED): for conlen in (0, len(input_data)): context = lz4frame.create_compression_context() compressed = lz4frame.compress_begin(context, content_checksum=chksum, source_size=conlen) compressed += lz4frame.compress_update(context, input_data) compressed += lz4frame.compress_end(context) for i in range(len(compressed)): with self.assertRaisesRegexp( RuntimeError, r'^(LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete|LZ4F_freeDecompressionContext reported unclean decompressor state \(truncated frame\?\): \d+)$' ): lz4frame.decompress(compressed[:i])
def coffea_pyapp(dataset, fn, treename, chunksize, index, procstr, timeout=None, flatten=True): import uproot import cloudpickle as cpkl import pickle as pkl import lz4.frame as lz4f from coffea import hist, processor from coffea.processor.accumulator import value_accumulator uproot.XRootDSource.defaults["parallel"] = False lz4_clevel = 1 # instrument xrootd source if not hasattr(uproot.source.xrootd.XRootDSource, '_read_real'): def _read(self, chunkindex): self.bytesread = getattr(self, 'bytesread', 0) + self._chunkbytes return self._read_real(chunkindex) uproot.source.xrootd.XRootDSource._read_real = uproot.source.xrootd.XRootDSource._read uproot.source.xrootd.XRootDSource._read = _read processor_instance = cpkl.loads(lz4f.decompress(procstr)) afile = uproot.open(fn) tree = None if isinstance(treename, str): tree = afile[treename] elif isinstance(treename, Sequence): for name in reversed(treename): if name in afile: tree = afile[name] else: raise Exception('treename must be a str or Sequence but is a %s!' % repr(type(treename))) if tree is None: raise Exception('No tree found, out of possible tree names: %s' % repr(treename)) df = processor.LazyDataFrame(tree, chunksize, index, flatten=flatten) df['dataset'] = dataset vals = processor_instance.process(df) if isinstance(afile.source, uproot.source.xrootd.XRootDSource): vals['_bytesread'] = value_accumulator(int) + afile.source.bytesread valsblob = lz4f.compress(pkl.dumps(vals), compression_level=lz4_clevel) return valsblob, df.size, dataset
def loads(s): try: return msgpack.unpackb(decompress(s), ext_hook=decode_ext, encoding='utf-8') except Exception: # we queue work occassionally from lambdas or other systems not using # the worker class return job_default_load(s)
def unpack_data(dump, w=None, h=None): if dump is None: return None decompressed = zs.decompress(dump) fast_decompressed = lz.decompress(decompressed) if fast_decompressed[:5] == b'Chain': return pickle.loads(fast_decompressed[5:]) else: return np.reshape(np.frombuffer(fast_decompressed), newshape=(h, w))
def test_decompress_return_type_3(): c = lz4frame.compress(b'', return_bytearray=False) r = lz4frame.decompress( c, return_bytearray=False, return_bytes_read=True ) assert isinstance(r, tuple) assert isinstance(r[0], bytes) assert isinstance(r[1], int)
def test_roundtrip_multiframe_1(data): nframes = 4 compressed = b'' for _ in range(nframes): compressed += lz4frame.compress(data) decompressed = b'' for _ in range(nframes): decompressed += lz4frame.decompress(compressed) assert len(decompressed) == nframes * len(data) assert data * nframes == decompressed
def test_roundtrip_multiframe_2(data): nframes = 4 compressed = b'' ctx = lz4frame.create_compression_context() for _ in range(nframes): compressed += lz4frame.compress_begin(ctx) compressed += lz4frame.compress_chunk(ctx, data) compressed += lz4frame.compress_flush(ctx) decompressed = b'' for _ in range(nframes): decompressed += lz4frame.decompress(compressed) assert len(decompressed) == nframes * len(data) assert data * nframes == decompressed
def test_roundtrip_2(data, block_size, block_linked, content_checksum, block_checksum, compression_level, auto_flush, store_size): c_context = lz4frame.create_compression_context() kwargs = {} kwargs['compression_level'] = compression_level kwargs['block_size'] = block_size kwargs['block_linked'] = block_linked kwargs['content_checksum'] = content_checksum kwargs['block_checksum'] = block_checksum kwargs['auto_flush'] = auto_flush if store_size is True: kwargs['source_size'] = len(data) compressed = lz4frame.compress_begin( c_context, **kwargs ) compressed += lz4frame.compress_chunk( c_context, data ) compressed += lz4frame.compress_flush(c_context) get_frame_info_check( compressed, len(data), store_size, block_size, block_linked, content_checksum, block_checksum, ) decompressed, bytes_read = lz4frame.decompress( compressed, return_bytes_read=True) assert bytes_read == len(compressed) assert decompressed == data
def test_decompress_truncated(data): compressed = lz4frame.compress(data) message = r'^LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete' with pytest.raises(RuntimeError, message=message): lz4frame.decompress(compressed[:6]) for i in range(16, len(compressed) - 1, 5): # 15 is the max size of the header message = r'^Frame incomplete. LZ4F_decompress returned: {0}'.format( len(compressed) - i) try: lz4frame.decompress(compressed[:i]) except RuntimeError as r: print(r) with pytest.raises(RuntimeError, message=message): lz4frame.decompress(compressed[:i])
def stream_datum(atclv_root, bucket, take_file=None): with gzip.GzipFile(os.path.join(atclv_root, bucket, autoclaving.INDEX_FNAME), 'r') as indexfd: filefd = None dociter = autoclaving.stream_json_blobs(indexfd) for _, doc in dociter: doc = ujson.loads(doc) t = doc['type'] if t == 'datum': # {"orig_sha1": "q7…I=", "text_off": 156846, "text_size": 58327, "type": "datum"} intra_off = doc['text_off'] - text_off datum = blob[intra_off:intra_off+doc['text_size']] assert intra_off >= 0 and len(datum) == doc['text_size'] datum = ujson.loads(datum) doc['frame_off'] = frame_off doc['frame_size'] = frame_size doc['intra_off'] = intra_off doc['intra_size'] = doc['text_size'] doc['datum'] = datum yield DATUM, doc del intra_off, datum elif t == 'frame': # {"file_off": 0, "file_size": 162864, "text_off": 0, "text_size": 362462, … } frame_off, frame_size = doc['file_off'], doc['file_size'] assert filefd.tell() == frame_off blob = filefd.read(frame_size) assert len(blob) == frame_size blob = lz4frame.decompress(blob) assert len(blob) == doc['text_size'] text_off = doc['text_off'] elif t == '/frame': del frame_off, frame_size, text_off, blob elif t == 'report': # {"orig_sha1": "HO…U=", # "src_size": 104006450, # "textname": "2017-01-01/20161231T000030Z-US-AS…-0.2.0-probe.json", …} yield REPORT_START, doc elif t == '/report': # {"info": "<class '__main__.TruncatedReportError'>", # "src_cutoff": 49484700, … } yield REPORT_END, doc elif t == 'file': # {"filename": "2017-01-01/20161231T000030Z-US-AS…-0.2.0-probe.json.lz4", …} filename = doc['filename'] assert filename.startswith(bucket) if take_file is None or take_file(filename): filefd = open(os.path.join(atclv_root, filename), 'rb') del filename yield FILE_START, doc else: for _, skipdoc in dociter: if '/file"' in skipdoc and ujson.loads(skipdoc)['type'] == '/file': break del filename, skipdoc elif t == '/file': # {"file_crc32": -156566611, "file_sha1": "q/…8=", "file_size": 18132131, …} assert filefd.tell() == doc['file_size'] filefd.close() filefd = None yield FILE_END, doc elif t == 'badblob': # {"orig_sha1": "RXQFwOtpKtS0KicYi8JnWeQYYBw=", # "src_off": 99257, "src_size": 238, # "info": "<class 'yaml.constructor.ConstructorError'>", …} yield BADBLOB, doc else: raise RuntimeError('Unknown record type', t) if filefd is not None: raise RuntimeError('Truncated autoclaved index', atclv_root, bucket)
def test_content_checksum_failure(data): compressed = lz4frame.compress(data, content_checksum=True) message = r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid$' with pytest.raises(RuntimeError, message=message): last = struct.unpack('B', compressed[-1:])[0] lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42))