def test_compress_decompress_no_parallel(): with patch('arctic._compression.clz4', sentinel.clz4), \ patch('arctic._compression.ENABLE_PARALLEL', False), \ patch('arctic._compression.lz4', wraps=lz4) as patch_lz4: # patching clz4 with sentinel will make accessing any clz4 function explode assert decompress(compress(b'Foo')) == b'Foo' assert patch_lz4.compress.call_args_list == [call(b'Foo')] assert patch_lz4.decompress.call_args_list == [call(compress(b'Foo'))]
def test_compress_decompress_no_parallel(): with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch( "arctic._compression.lz4", wraps=lz4 ) as patch_lz4: # patching clz4 with sentinel will make accessing any clz4 function explode assert decompress(compress("Foo")) == "Foo" assert patch_lz4.compress.call_args_list == [call("Foo")] assert patch_lz4.decompress.call_args_list == [call(compress("Foo"))]
def test_exceptions(): data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111') data = data[0:16] with pytest.raises(Exception) as e: c.decompress(data) assert("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower()) data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111') data = [data[0:16] for x in (1, 2, 3)] with pytest.raises(Exception) as e: c.decompress_array(data) assert ("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
def test_exceptions(): data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111') data = data[0:16] with pytest.raises(Exception) as e: c.decompress(data) assert("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower()) data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111') data = [data[0:16] for x in (1, 2, 3)] with pytest.raises(Exception) as e: c.decompress_array(data) assert ("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower())
def test_exceptions(): data = c.compress( b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111' ) data = data[0:16] with pytest.raises(Exception) as e: c.decompress(data) assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e)) data = c.compress( b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111' ) data = [data[0:16] for x in (1, 2, 3)] with pytest.raises(Exception) as e: c.decompress_array(data) assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e))
def test_compress_array_no_parallel(): a = [b'spam', b'egg', b'spamm', b'spammm'] with patch('arctic._compression.clz4', sentinel.clz4), \ patch('arctic._compression.ENABLE_PARALLEL', False), \ patch('arctic._compression.lz4', wraps=lz4) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
def test_compress_array_no_parallel(): a = ["spam", "egg", "spamm", "spammm"] with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch( "arctic._compression.lz4", wraps=lz4 ) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
def test_compress_array_no_parallel(): a = [b'spam', b'egg', b'spamm', b'spammm'] with patch('arctic._compression.clz4', sentinel.clz4), \ patch('arctic._compression.ENABLE_PARALLEL', False), \ patch('arctic._compression.lz4', wraps=lz4) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [ call(compress(x)) for x in a ]
def test_write_object(): arctic_lib = Mock() self = create_autospec(PickleStore) version = {'_id': ObjectId()} PickleStore.write(self, arctic_lib, version, 'sentinel.symbol', sentinel.item, sentinel.previous_version) assert 'data' not in version assert version['blob'] == '__chunked__V2' coll = arctic_lib.get_top_level_collection.return_value assert coll.update_one.call_args_list == [ call( { 'sha': checksum( 'sentinel.symbol', { 'segment': 0, 'data': Binary( compress( cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL))) }), 'symbol': 'sentinel.symbol' }, { '$set': { 'segment': 0, 'data': Binary( compress( cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)), 0) }, '$addToSet': { 'parent': version['_id'] } }, upsert=True) ]
def bench_single(repeats, _strarr, use_HC): # Arctic compress single measurements = [] for i in range(repeats): now = dt.now() if use_HC: res = [c.compressHC(x) for x in _strarr] else: res = [c.compress(x) for x in _strarr] sample = (dt.now() - now).total_seconds() assert all(res) measurements.append(sample) return measurements
def _segment_index(self, recarr, existing_index, start, new_segments): """ Generate index of datetime64 -> item offset. Parameters: ----------- new_data: new data being written (or appended) existing_index: index field from the versions document of the previous version start: first (0-based) offset of the new data segments: list of offsets. Each offset is the row index of the the last row of a particular chunk relative to the start of the _original_ item. array(new_data) - segments = array(offsets in item) Returns: -------- Binary(compress(array([(index, datetime)])) Where index is the 0-based index of the datetime in the DataFrame """ # find the index of the first datetime64 column idx_col = self._datetime64_index(recarr) # if one exists let's create the index on it if idx_col is not None: new_segments = np.array(new_segments, dtype='i8') last_rows = recarr[new_segments - start] # create numpy index index = np.core.records.fromarrays([last_rows[idx_col]] + [ new_segments, ], dtype=INDEX_DTYPE) # append to existing index if exists if existing_index: # existing_index_arr is read-only but it's never written to existing_index_arr = np.frombuffer(decompress(existing_index), dtype=INDEX_DTYPE) if start > 0: existing_index_arr = existing_index_arr[ existing_index_arr['index'] < start] index = np.concatenate((existing_index_arr, index)) return Binary(compress(index.tostring())) elif existing_index: raise ArcticException( "Could not find datetime64 index in item but existing data contains one" ) return None
def test_compress_empty_string(): assert(decompress(compress(b'')) == b'')
def test_compress(): assert len(compress("foobar")) > 0
def test_decompress(): assert decompress(compress(b"foo")) == b"foo"
def test_compress(): assert len(compress(b'foobar')) > 0
def test_compress_LZ4(): cfn = Mock() with patch('arctic._compression.clz4.compress', cfn): compress(b"foo") assert cfn.call_count == 1
def test_compress_empty_string(): assert (decompress(compress(b'')) == b'')
def test_decompress(): assert decompress(compress("foo")) == "foo"
def test_write_object(): arctic_lib = Mock() self = create_autospec(PickleStore) version = {'_id': ObjectId()} PickleStore.write(self, arctic_lib, version, 'sentinel.symbol', sentinel.item, sentinel.previous_version) assert 'data' not in version assert version['blob'] == '__chunked__V2' coll = arctic_lib.get_top_level_collection.return_value assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol', {'segment':0, 'data': Binary(compress(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)))}), 'symbol': 'sentinel.symbol'}, {'$set': {'segment': 0, 'data': Binary(compress(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)), 0)}, '$addToSet': {'parent': version['_id']}}, upsert=True)]
def test_roundtrip_multi(n): _str = random_string(n) cstr = c.compress(_str) assert _str == c.decompress(cstr)
def test_compress_LZ4HC(): use_lz4hc(True) cfn = Mock() with patch('arctic._compression.clz4.compressHC', cfn): compress("foo") assert cfn.call_count == 1
def test_compress_LZ4(): cfn = Mock() with patch('arctic._compression.lz4_compress', cfn): compress(b"foo") assert cfn.call_count == 1
def test_compress_LZ4(): use_lz4hc(False) cfn = Mock() with patch('arctic._compression.clz4.compress', cfn): compress("foo") assert cfn.call_count == 1