Exemplo n.º 1
0
def test_dont_cleanup_recent_orphaned_snapshots(mongo_host, library, data,
                                                dry_run, fw_pointers_config):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    with FwPointersCtx(fw_pointers_config):
        today = dt.utcnow() - dtd(hours=12, seconds=1)
        _id = bson.ObjectId.from_datetime(today)
        library.write('symbol', data, prune_previous_version=False)
        with patch("bson.ObjectId", return_value=_id):
            library.snapshot('snap_name')

        # Remove the version document ; should cleanup
        assert library._collection.snapshots.delete_many({})

        # No cleanup on dry-run
        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host)
            assert mongo_count(library._collection) > 0
            assert mongo_count(library._collection.versions)
            assert repr(library.read('symbol').data) == repr(data)
            # Nothing done
            assert len(library._collection.versions.find_one({})['parent'])
        else:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host, '-f')
            assert mongo_count(library._collection) > 0
            assert mongo_count(library._collection.versions)
            # Data still available (write with prune_previous_version will do the cleanup)
            assert repr(library.read('symbol').data) == repr(data)
            # Snapshot cleaned up
            assert len(library._collection.versions.find_one({})['parent'])
Exemplo n.º 2
0
def test_cleanup_orphaned_chunks(mongo_host, library, data, dry_run,
                                 fw_pointers_config):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(days=1, seconds=1)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)

        # Number of chunks
        chunk_count = mongo_count(library._collection)
        # Remove the version document ; should cleanup
        library._collection.versions.delete_one({'_id': _id})

        # No cleanup on dry-run
        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host)
            assert mongo_count(library._collection) == chunk_count
        else:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host, '-f')
            assert mongo_count(library._collection) == 0
Exemplo n.º 3
0
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000):
            ndarr = np.random.rand(1024)
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)
            orig_chunks = mongo_count(library._collection)
            assert orig_chunks == 9

            # Concatenate more values
            ndarr = np.concatenate([ndarr, np.random.rand(10)])
            # And change the original values - we're not a simple append
            ndarr[0] = ndarr[1] = ndarr[2] = 0
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)

            # Should contain the original chunks, but not double the number
            # of chunks
            new_chunks = mongo_count(library._collection)
            assert new_chunks == 11

            if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID):
                # We hit the update (rather than upsert) code path
                assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7

            if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED):
                assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
Exemplo n.º 4
0
def test_cleanup_orphaned_chunk_doesnt_break_versions(mongo_host, library,
                                                      data,
                                                      fw_pointers_config):
    """
    Check that a chunk pointed to by more than one version, aren't inadvertently cleared
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(days=1, seconds=1)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)

        # Re-Write the data again
        # Write a whole new version rather than going down the append path...
        #     - we want two self-standing versions, the removal of one shouldn't break the other...
        with patch('arctic.store._ndarray_store._APPEND_COUNT', 0):
            library.write('symbol', data, prune_previous_version=False)
        library._delete_version('symbol', 1)
        library._collection.versions.delete_one({'_id': _id})
        assert repr(library.read('symbol').data) == repr(data)

        run_as_main(main, '--library', 'user.library', '--host', mongo_host,
                    '-f')
        assert repr(library.read('symbol').data) == repr(data)
        library.delete('symbol')
        assert mongo_count(library._collection.versions) == 0
Exemplo n.º 5
0
def test_empty_append_concat_and_rewrite_2(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr2 = np.array(["a", "b", "c"])
        library.write('MYARR', ndarr2)
        for _ in range(_APPEND_COUNT + 1):
            library.append('MYARR', ndarr2)
        saved_arr = library.read('MYARR').data
        assert np.all(saved_arr == np.hstack([ndarr2] * (_APPEND_COUNT + 2)))
Exemplo n.º 6
0
def test_empty_append_promotes_dtype(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.array(["a", "b", "c"])
        ndarr2 = np.array([])
        library.write('MYARR', ndarr)
        library.append('MYARR', ndarr2)
        saved_arr = library.read('MYARR').data
        assert np.all(saved_arr == ndarr)
Exemplo n.º 7
0
def test_append_simple_ndarray_promoting_types(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.ones(100, dtype='int64')
        library.write('MYARR', ndarr)
        library.append('MYARR', np.ones(100, dtype='float64'))
        library.append('MYARR', np.ones(100, dtype='int64'))
        library.append('MYARR', np.ones(205, dtype='float64'))
        saved_arr = library.read('MYARR').data
        assert np.all(np.ones(505, dtype='float64') == saved_arr)
Exemplo n.º 8
0
def test_multiple_write(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.empty(1000, dtype=[('abc', 'int64')])
        foo = np.empty(900, dtype=[('abc', 'int64')])
        library.write('MYARR', foo)
        v1 = library.read('MYARR').version
        library.write('MYARR', ndarr[:900])
        v2 = library.read('MYARR').version
        library.append('MYARR', ndarr[-100:])
        v3 = library.read('MYARR').version

        assert np.all(ndarr == library.read('MYARR').data)
        assert np.all(ndarr == library.read('MYARR', as_of=v3).data)
        assert np.all(foo == library.read('MYARR', as_of=v1).data)
        assert np.all(ndarr[:900] == library.read('MYARR', as_of=v2).data)
Exemplo n.º 9
0
def test_append_ndarray_with_field_shape(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.empty(10, dtype=[('A', 'int64'), ('B', 'float64', (2,))])
        ndarr['A'] = 1
        ndarr['B'] = 2
        ndarr2 = np.empty(10, dtype=[('A', 'int64'), ('B', 'int64', (2,))])
        ndarr2['A'] = 1
        ndarr2['B'] = 2

        library.write('MYARR', ndarr)
        library.append('MYARR', ndarr2)
        saved_arr = library.read('MYARR').data
        ndarr3 = np.empty(20, dtype=[('A', 'int64'), ('B', 'float64', (2,))])
        ndarr3['A'] = 1
        ndarr3['B'] = 2
        assert np.all(ndarr3 == saved_arr)
Exemplo n.º 10
0
def test_save_append_read_ndarray(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        dtype = np.dtype([('abc', 'int64')])
        ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
        assert len(ndarr.tostring()) > 16 * 1024 * 1024
        library.write('MYARR', ndarr)

        sliver = np.arange(30).view(dtype=dtype)
        library.append('MYARR', sliver)

        saved_arr = library.read('MYARR').data
        assert np.all(np.concatenate([ndarr, sliver]) == saved_arr)

        library.append('MYARR', sliver)
        saved_arr = library.read('MYARR').data
        assert np.all(np.concatenate([ndarr, sliver, sliver]) == saved_arr)
Exemplo n.º 11
0
def test_append_after_failed_append(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        dtype = np.dtype([('abc', 'int64')])
        ndarr = np.arange(30 / dtype.itemsize).view(dtype=dtype)

        v1 = library.write('MYARR', ndarr)

        sliver = np.arange(3, 4).view(dtype=dtype)
        v2 = library.append('MYARR', sliver)

        # simulate a failed append - intentionally leave an orphaned chunk lying around here
        library._delete_version('MYARR', v2.version, do_cleanup=False)

        sliver2 = np.arange(3, 5).view(dtype=dtype)
        v3 = library.append('MYARR', sliver2)

        assert np.all(ndarr == library.read('MYARR', as_of=v1.version).data)
        assert np.all(np.concatenate([ndarr, sliver2]) == library.read('MYARR', as_of=v3.version).data)
Exemplo n.º 12
0
def test_cleanup_orphaned_chunks_ignores_recent(mongo_host, library, data, dry_run, fw_pointers_config):
    """
    We don't cleanup any chunks in the range of today.  That's just asking for trouble
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(hours=12)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)
        chunk_count = mongo_count(library._collection)
        library._collection.versions.delete_one({'_id': _id})

        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host', mongo_host)
            assert mongo_count(library._collection) == chunk_count
        else:
            run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
            assert mongo_count(library._collection) == chunk_count
Exemplo n.º 13
0
def test_save_append_delete_append(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        dtype = np.dtype([('abc', 'int64')])
        ndarr = np.arange(30 / dtype.itemsize).view(dtype=dtype)
        v1 = library.write('MYARR', ndarr)

        sliver = np.arange(30).view(dtype=dtype)
        v2 = library.append('MYARR', sliver)

        # intentionally leave an orphaned chunk lying around here
        library._delete_version('MYARR', v2.version, do_cleanup=False)

        sliver2 = np.arange(start=10, stop=40).view(dtype=dtype)
        # we can't append here, as the latest version is now out of sync with version_nums.
        # This gets translated to a do_append by the handler anyway.
        v3 = library.write('MYARR', np.concatenate([ndarr, sliver2]))

        assert np.all(ndarr == library.read('MYARR', as_of=v1.version).data)

        # Check that we don't get the orphaned chunk from v2 back again.
        assert np.all(np.concatenate([ndarr, sliver2]) == library.read('MYARR', as_of=v3.version).data)
Exemplo n.º 14
0
def test_cleanup_noop(mongo_host, library, data, dry_run, fw_pointers_config):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(days=1, seconds=1)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)

        # Number of chunks
        chunk_count = mongo_count(library._collection)

        # No cleanup on dry-run
        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host', mongo_host)
            assert mongo_count(library._collection) == chunk_count
            assert repr(library.read('symbol').data) == repr(data)
        else:
            run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
            assert mongo_count(library._collection) == chunk_count
            assert repr(library.read('symbol').data) == repr(data)
Exemplo n.º 15
0
def test_append_read_large_ndarray(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        dtype = np.dtype([('abc', 'int64')])
        ndarr = np.arange(50 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
        assert len(ndarr.tostring()) > 16 * 1024 * 1024
        library.write('MYARR1', ndarr)
        # Exactly enough appends to trigger 2 re-compacts, so the result should be identical
        # to writing the whole array at once
        ndarr2 = np.arange(240).view(dtype=dtype)
        for n in np.split(ndarr2, 120):
            library.append('MYARR1', n)

        saved_arr = library.read('MYARR1').data
        assert np.all(np.concatenate([ndarr, ndarr2]) == saved_arr)

        library.write('MYARR2', np.concatenate([ndarr, ndarr2]))

        version1 = library._read_metadata('MYARR1')
        version2 = library._read_metadata('MYARR2')
        assert version1['append_count'] == version2['append_count']
        assert version1['append_size'] == version2['append_size']
        assert version1['segment_count'] == version2['segment_count']
        assert version1['up_to'] == version2['up_to']
Exemplo n.º 16
0
def test_save_read_big_1darray(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.random.rand(5326, 6020).ravel()
        library.write('MYARR', ndarr)
        saved_arr = library.read('MYARR').data
        assert np.all(ndarr == saved_arr)
Exemplo n.º 17
0
def test_save_read_massive_2darray(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        ndarr = np.random.rand(1, 320000)
        library.write('MYARR', ndarr)
        saved_arr = library.read('MYARR').data
        assert np.all(ndarr == saved_arr)