Пример #1
0
def test_compress_decompress_no_parallel():
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress(b'Foo')) == b'Foo'
        assert patch_lz4.compress.call_args_list == [call(b'Foo')]
        assert patch_lz4.decompress.call_args_list == [call(compress(b'Foo'))]
Пример #2
0
def test_compress_decompress_no_parallel():
    with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch(
        "arctic._compression.lz4", wraps=lz4
    ) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress("Foo")) == "Foo"
        assert patch_lz4.compress.call_args_list == [call("Foo")]
        assert patch_lz4.decompress.call_args_list == [call(compress("Foo"))]
Пример #3
0
def test_compress_decompress_no_parallel():
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        # patching clz4 with sentinel will make accessing any clz4 function explode
        assert decompress(compress(b'Foo')) == b'Foo'
        assert patch_lz4.compress.call_args_list == [call(b'Foo')]
        assert patch_lz4.decompress.call_args_list == [call(compress(b'Foo'))]
Пример #4
0
def test_exceptions():
    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())

    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
Пример #5
0
def test_exceptions():
    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower())

    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("decompressor wrote" in str(e).lower() or "corrupt input at" in str(e).lower() or "decompression failed: corrupt input" in str(e).lower())
Пример #6
0
def test_exceptions():
    data = c.compress(
        b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111'
    )
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e))

    data = c.compress(
        b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111'
    )
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("Decompressor wrote" in str(e) or "Corrupt input at" in str(e))
Пример #7
0
def test_compress_array_no_parallel():
    a = [b'spam', b'egg', b'spamm', b'spammm']
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
Пример #8
0
def test_compress_array_no_parallel():
    a = ["spam", "egg", "spamm", "spammm"]
    with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch(
        "arctic._compression.lz4", wraps=lz4
    ) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
Пример #9
0
def test_compress_array_no_parallel():
    a = [b'spam', b'egg', b'spamm', b'spammm']
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [
            call(compress(x)) for x in a
        ]
Пример #10
0
def test_write_object():
    arctic_lib = Mock()
    self = create_autospec(PickleStore)
    version = {'_id': ObjectId()}
    PickleStore.write(self, arctic_lib, version, 'sentinel.symbol',
                      sentinel.item, sentinel.previous_version)
    assert 'data' not in version

    assert version['blob'] == '__chunked__V2'
    coll = arctic_lib.get_top_level_collection.return_value
    assert coll.update_one.call_args_list == [
        call(
            {
                'sha':
                checksum(
                    'sentinel.symbol', {
                        'segment':
                        0,
                        'data':
                        Binary(
                            compress(
                                cPickle.dumps(sentinel.item,
                                              cPickle.HIGHEST_PROTOCOL)))
                    }),
                'symbol':
                'sentinel.symbol'
            }, {
                '$set': {
                    'segment':
                    0,
                    'data':
                    Binary(
                        compress(
                            cPickle.dumps(sentinel.item,
                                          cPickle.HIGHEST_PROTOCOL)), 0)
                },
                '$addToSet': {
                    'parent': version['_id']
                }
            },
            upsert=True)
    ]
Пример #11
0
def bench_single(repeats, _strarr, use_HC):
    # Arctic compress single
    measurements = []
    for i in range(repeats):
        now = dt.now()
        if use_HC:
            res = [c.compressHC(x) for x in _strarr]
        else:
            res = [c.compress(x) for x in _strarr]
        sample = (dt.now() - now).total_seconds()
        assert all(res)
        measurements.append(sample)
    return measurements
Пример #12
0
def bench_single(repeats, _strarr, use_HC):
    # Arctic compress single
    measurements = []
    for i in range(repeats):
        now = dt.now()
        if use_HC:
            res = [c.compressHC(x) for x in _strarr]
        else:
            res = [c.compress(x) for x in _strarr]
        sample = (dt.now() - now).total_seconds()
        assert all(res)
        measurements.append(sample)
    return measurements
Пример #13
0
    def _segment_index(self, recarr, existing_index, start, new_segments):
        """
        Generate index of datetime64 -> item offset.

        Parameters:
        -----------
        new_data: new data being written (or appended)
        existing_index: index field from the versions document of the previous version
        start: first (0-based) offset of the new data
        segments: list of offsets. Each offset is the row index of the
                  the last row of a particular chunk relative to the start of the _original_ item.
                  array(new_data) - segments = array(offsets in item)

        Returns:
        --------
        Binary(compress(array([(index, datetime)]))
            Where index is the 0-based index of the datetime in the DataFrame
        """
        # find the index of the first datetime64 column
        idx_col = self._datetime64_index(recarr)
        # if one exists let's create the index on it
        if idx_col is not None:
            new_segments = np.array(new_segments, dtype='i8')
            last_rows = recarr[new_segments - start]
            # create numpy index
            index = np.core.records.fromarrays([last_rows[idx_col]] + [
                new_segments,
            ],
                                               dtype=INDEX_DTYPE)
            # append to existing index if exists
            if existing_index:
                # existing_index_arr is read-only but it's never written to
                existing_index_arr = np.frombuffer(decompress(existing_index),
                                                   dtype=INDEX_DTYPE)
                if start > 0:
                    existing_index_arr = existing_index_arr[
                        existing_index_arr['index'] < start]
                index = np.concatenate((existing_index_arr, index))
            return Binary(compress(index.tostring()))
        elif existing_index:
            raise ArcticException(
                "Could not find datetime64 index in item but existing data contains one"
            )
        return None
Пример #14
0
def test_compress_empty_string():
    assert(decompress(compress(b'')) == b'')
Пример #15
0
def test_compress():
    assert len(compress("foobar")) > 0
Пример #16
0
def test_decompress():
    assert decompress(compress(b"foo")) == b"foo"
Пример #17
0
def test_compress():
    assert len(compress(b'foobar')) > 0
Пример #18
0
def test_compress():
    assert len(compress("foobar")) > 0
Пример #19
0
def test_compress_LZ4():
    cfn = Mock()
    with patch('arctic._compression.clz4.compress', cfn):
        compress(b"foo")
        assert cfn.call_count == 1
Пример #20
0
def test_compress_empty_string():
    assert (decompress(compress(b'')) == b'')
Пример #21
0
def test_decompress():
    assert decompress(compress("foo")) == "foo"
Пример #22
0
def test_write_object():
    arctic_lib = Mock()
    self = create_autospec(PickleStore)
    version = {'_id': ObjectId()}
    PickleStore.write(self, arctic_lib, version, 'sentinel.symbol', sentinel.item, sentinel.previous_version)
    assert 'data' not in version

    assert version['blob'] == '__chunked__V2'
    coll = arctic_lib.get_top_level_collection.return_value
    assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol', {'segment':0, 'data': Binary(compress(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)))}),
                                                    'symbol': 'sentinel.symbol'},
                                                   {'$set': {'segment': 0, 'data': Binary(compress(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)), 0)},
                                                    '$addToSet': {'parent': version['_id']}}, upsert=True)]
Пример #23
0
def test_roundtrip_multi(n):
    _str = random_string(n)
    cstr = c.compress(_str)
    assert _str == c.decompress(cstr)
Пример #24
0
def test_compress_LZ4HC():
    use_lz4hc(True)
    cfn = Mock()
    with patch('arctic._compression.clz4.compressHC', cfn):
        compress("foo")
        assert cfn.call_count == 1
Пример #25
0
def test_compress_LZ4():
    cfn = Mock()
    with patch('arctic._compression.lz4_compress', cfn):
        compress(b"foo")
        assert cfn.call_count == 1
Пример #26
0
def test_compress():
    assert len(compress(b'foobar')) > 0
Пример #27
0
def test_compress_LZ4():
    use_lz4hc(False)
    cfn = Mock()
    with patch('arctic._compression.clz4.compress', cfn):
        compress("foo")
        assert cfn.call_count == 1
Пример #28
0
def test_roundtrip_multi(n):
    _str = random_string(n)
    cstr = c.compress(_str)
    assert _str == c.decompress(cstr)