Пример #1
0
def setup_and_teardown(app):
    """
    Run create mapping and purge queue before tests and clear out the
    DB tables after the test
    """
    import transaction
    from sqlalchemy import MetaData
    from zope.sqlalchemy import mark_changed
    # BEFORE THE TEST - just run CM for the TEST_TYPE by default
    create_mapping.run(app, collections=[TEST_TYPE], skip_indexing=True)
    app.registry[INDEXER_QUEUE].clear_queue()

    yield  # run the test

    # AFTER THE TEST
    session = app.registry[DBSESSION]
    connection = session.connection().connect()
    meta = MetaData(bind=session.connection(), reflect=True)
    for table in meta.sorted_tables:
        print('Clear table %s' % table)
        print('Count before -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)))
        connection.execute(table.delete())
        print('Count after -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n')
    session.flush()
    mark_changed(session())
    transaction.commit()
Пример #2
0
def test_create_mapping_index_diff(app, testapp, indexer_testapp):
    from snovault.elasticsearch import create_mapping
    es = app.registry[ELASTIC_SEARCH]
    # post a couple items, index, then remove one
    res = testapp.post_json(TEST_COLL, {'required': ''})
    test_uuid = res.json['@graph'][0]['uuid']
    testapp.post_json(TEST_COLL, {'required': ''})  # second item
    create_mapping.run(app, collections=[TEST_TYPE], purge_queue=True)
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(4)
    initial_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    assert initial_count == 2

    # remove one item
    es.delete(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid)
    time.sleep(8)
    second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    assert second_count == 1

    # patch the item to increment version
    res = testapp.patch_json(TEST_COLL + test_uuid, {'required': 'meh'})
    # index with index_diff to ensure the item is reindexed
    create_mapping.run(app, collections=[TEST_TYPE], index_diff=True)
    res = indexer_testapp.post_json('/index', {'record': True})
    time.sleep(4)
    third_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    assert third_count == initial_count
Пример #3
0
def test_es_indices(app, elasticsearch):
    """
    Test overall create_mapping functionality using app.
    Do this by checking es directly before and after running mapping.
    Delete an index directly, run again to see if it recovers.
    """
    es = app.registry[ELASTIC_SEARCH]
    item_types = app.registry[TYPES].by_item_type
    test_collections = [TEST_TYPE]
    # run create mapping for all types, but no need to index
    run(app, collections=test_collections, skip_indexing=True)
    # check that mappings and settings are in index
    for item_type in test_collections:
        item_mapping = type_mapping(app.registry[TYPES], item_type)
        try:
            item_index = es.indices.get(index=item_type)
        except:
            assert False
        found_index_mapping = item_index.get(item_type, {}).get('mappings').get(item_type, {}).get('properties', {}).get('embedded')
        found_index_settings = item_index.get(item_type, {}).get('settings')
        assert found_index_mapping
        assert found_index_settings
        # get the item record from meta and compare that
        full_mapping = create_mapping_by_type(item_type, app.registry)
        item_record = build_index_record(full_mapping, item_type)
        try:
            item_meta = es.get(index='meta', doc_type='meta', id=item_type)
        except:
            assert False
        meta_record = item_meta.get('_source', None)
        assert meta_record
        assert item_record == meta_record
Пример #4
0
def setup_and_teardown(app):
    """
    Run create mapping and purge queue before tests and clear out the
    DB tables after the test
    """

    # BEFORE THE TEST - run create mapping for tests types and clear queues
    create_mapping.run(app, collections=TEST_COLLECTIONS, skip_indexing=True)
    app.registry[INDEXER_QUEUE].clear_queue()

    yield  # run the test

    # AFTER THE TEST
    session = app.registry[DBSESSION]
    connection = session.connection().connect()
    meta = MetaData(bind=session.connection())
    meta.reflect()
    for table in meta.sorted_tables:
        print('Clear table %s' % table)
        print('Count before -->',
              str(connection.scalar("SELECT COUNT(*) FROM %s" % table)))
        connection.execute(table.delete())
        print('Count after -->',
              str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n')
    session.flush()
    mark_changed(session())
    transaction.commit()
Пример #5
0
def test_sync_and_queue_indexing(app, testapp, indexer_testapp):
    es = app.registry[ELASTIC_SEARCH]
    indexer_queue = app.registry[INDEXER_QUEUE]
    # clear queue before starting this one
    indexer_queue.clear_queue()
    # queued on post - total of one item queued
    res = testapp.post_json(TEST_COLL, {'required': ''})
    # synchronously index
    create_mapping.run(app, collections=[TEST_TYPE], sync_index=True)
    #time.sleep(6)
    doc_count = tries = 0
    while(tries < 6):
        doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
        if doc_count != 0:
            break
        time.sleep(1)
        tries += 1
    assert doc_count == 1
    # post second item to database but do not index (don't load into es)
    # queued on post - total of two items queued
    res = testapp.post_json(TEST_COLL, {'required': ''})
    #time.sleep(2)
    doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    # doc_count has not yet updated
    assert doc_count == 1
    # clear the queue by indexing and then run create mapping to queue the all items
    res = indexer_testapp.post_json('/index', {'record': True})
    assert res.json['indexing_count'] == 2
    create_mapping.run(app, collections=[TEST_TYPE])
    res = indexer_testapp.post_json('/index', {'record': True})
    assert res.json['indexing_count'] == 2
    time.sleep(4)
    doc_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    assert doc_count == 2
Пример #6
0
def es_app(es_app_settings, **kwargs):
    """
    App that uses both Postgres and ES - pass this as "app" argument to TestApp.
    Pass all kwargs onto create_mapping
    """
    app = main({}, **es_app_settings)
    create_mapping.run(app, **kwargs)

    return app
Пример #7
0
def test_indexing_info(app, testapp, indexer_testapp):
    """
    Test the information on indexing-info for a given uuid and make sure that
    it updates properly following indexing
    """
    # first, run create mapping with the indices we will use
    create_mapping.run(
        app,
        collections=['testing_link_target_sno', 'testing_link_source_sno'],
        skip_indexing=True
    )
    target1 = {'name': 't_one', 'uuid': str(uuid.uuid4())}
    target2 = {'name': 't_two', 'uuid': str(uuid.uuid4())}
    source = {
        'name': 'idx_source',
        'target': target1['uuid'],
        'uuid': str(uuid.uuid4()),
        'status': 'current',
    }
    testapp.post_json('/testing-link-targets-sno/', target1, status=201)
    testapp.post_json('/testing-link-targets-sno/', target2, status=201)
    testapp.post_json('/testing-link-sources-sno/', source, status=201)
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(2)
    # indexing-info fails without uuid query param
    idx_info_err = testapp.get('/indexing-info')
    assert idx_info_err.json['status'] == 'error'
    src_idx_info = testapp.get('/indexing-info?uuid=%s' % source['uuid'])
    assert src_idx_info.json['status'] == 'success'
    # up to date
    assert src_idx_info.json['sid_es'] == src_idx_info.json['sid_db']
    assert set(src_idx_info.json['uuids_invalidated']) == set([target1['uuid'], source['uuid']])
    # update without indexing; view should capture the changes but sid_es will not change
    testapp.patch_json('/testing-link-sources-sno/' + source['uuid'], {'target': target2['uuid']})
    src_idx_info2 = testapp.get('/indexing-info?uuid=%s' % source['uuid'])
    assert src_idx_info2.json['status'] == 'success'
    # es is now out of date, since not indexed yet
    assert src_idx_info2.json['sid_es'] < src_idx_info2.json['sid_db']
    # target1 will still be in invalidated uuids, since es has not updated
    assert set(src_idx_info2.json['uuids_invalidated']) == set([target1['uuid'], target2['uuid'], source['uuid']])
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(2)
    # after indexing, make sure sid_es is updated
    src_idx_info3 = testapp.get('/indexing-info?uuid=%s' % source['uuid'])
    assert src_idx_info3.json['status'] == 'success'
    assert src_idx_info3.json['sid_es'] == src_idx_info3.json['sid_db']
    # target1 has now been updated and removed from invalidated uuids
    assert set(src_idx_info3.json['uuids_invalidated']) == set([target2['uuid'], source['uuid']])
    # try the view without calculated embedded view
    src_idx_info4 = testapp.get('/indexing-info?uuid=%s&run=False' % source['uuid'])
    assert src_idx_info4.json['status'] == 'success'
    assert 'uuids_invalidated' not in src_idx_info4.json
    assert 'embedded_seconds' not in src_idx_info4.json
Пример #8
0
def app(app_settings):
    from snowflakes import main
    from snovault.elasticsearch import create_mapping
    app = main({}, **app_settings)

    create_mapping.run(app, skip_indexing=True)
    yield app

    from snovault import DBSESSION
    DBSession = app.registry[DBSESSION]
    # Dispose connections so postgres can tear down.
    DBSession.bind.pool.dispose()
Пример #9
0
def app(app_settings, **kwargs):
    """
    Pass all kwargs onto create_mapping
    """
    from encoded import main
    from snovault.elasticsearch import create_mapping
    app = main({}, **app_settings)
    create_mapping.run(app, **kwargs)

    yield app

    from snovault import DBSESSION
    DBSession = app.registry[DBSESSION]
    # Dispose connections so postgres can tear down.
    DBSession.bind.pool.dispose()
Пример #10
0
def test_indexing_invalid_sid_linked_items(app, testapp, indexer_testapp):
    """
    Make sure that items sent to the deferred queue do not trigger indexing
    of secondary items
    """
    indexer_queue = app.registry[INDEXER_QUEUE]
    es = app.registry[ELASTIC_SEARCH]
    create_mapping.run(
        app,
        collections=['testing_link_target_sno', 'testing_link_source_sno'],
        skip_indexing=True
    )
    target1 = {'name': 't_one', 'uuid': str(uuid.uuid4())}
    source = {
        'name': 'idx_source',
        'target': target1['uuid'],
        'uuid': str(uuid.uuid4()),
        'status': 'current',
    }
    testapp.post_json('/testing-link-targets-sno/', target1, status=201)
    testapp.post_json('/testing-link-sources-sno/', source, status=201)
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(2)
    es_item = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno',
                     id=target1['uuid'])
    inital_version = es_item['_version']

    # now try to manually bump an invalid version for the queued item
    # expect it to be sent to the deferred queue.
    to_queue = {
        'uuid': target1['uuid'],
        'sid': inital_version + 2,
        'strict': False,
        'timestamp': datetime.utcnow().isoformat()
    }
    indexer_queue.send_messages([to_queue], target_queue='primary')
    # make sure nothing is in secondary queue after calling /index
    received_secondary = indexer_queue.receive_messages(target_queue='secondary')
    assert len(received_secondary) == 0
    res = indexer_testapp.post_json('/index', {'record': True})
    time.sleep(4)
    assert res.json['indexing_count'] == 0
    # make sure nothing is in secondary queue after calling /index
    received_secondary = indexer_queue.receive_messages(target_queue='secondary')
    assert len(received_secondary) == 0
    received_deferred = indexer_queue.receive_messages(target_queue='deferred')
    assert len(received_deferred) == 1
    indexer_queue.delete_messages(received_deferred, target_queue='deferred')
Пример #11
0
def app(app_settings):
    from snowflakes import main
    from snovault.elasticsearch import create_mapping
    app = main({}, **app_settings)

    create_mapping.run(app)
    yield app

    # Shutdown multiprocessing pool to close db conns.
    from snovault.elasticsearch import INDEXER
    app.registry[INDEXER].shutdown()

    from snovault import DBSESSION
    DBSession = app.registry[DBSESSION]
    # Dispose connections so postgres can tear down.
    DBSession.bind.pool.dispose()
Пример #12
0
def test_index_data_workbook(app, workbook, testapp, indexer_testapp, htmltestapp):
    from snovault.elasticsearch import create_mapping
    es = app.registry['elasticsearch']
    # we need to reindex the collections to make sure numbers are correct
    # TODO: NAMESPACE - here, passed in list to create_mapping
    # turn of logging for a bit
    create_mapping.run(app, sync_index=True)
    # check counts and ensure they're equal
    testapp_counts = testapp.get('/counts')
    total_counts = testapp_counts.json['db_es_total']
    split_counts = total_counts.split()  # 2nd item is db counts, 4th is es
    assert(int(split_counts[1]) == int(split_counts[3]))
    for item_type in TYPE_LENGTH.keys():
        tries = 0
        item_len = None
        while item_len is None or (item_len != TYPE_LENGTH[item_type] and tries < 3):
            if item_len != None:
                create_mapping.run(app, collections=[item_type], strict=True, sync_index=True)
                es.indices.refresh(index=item_type)
            item_len = es.count(index=item_type, doc_type=item_type).get('count')
            print('... ES COUNT: %s' % item_len)
            print('... TYPE COUNT: %s' % TYPE_LENGTH[item_type])
            tries += 1
        assert item_len == TYPE_LENGTH[item_type]
        if item_len > 0:
            res = testapp.get('/%s?limit=all' % item_type, status=[200, 301, 404])
            res = res.follow()
            for item_res in res.json.get('@graph', []):
                index_view_res = es.get(index=item_type, doc_type=item_type,
                                        id=item_res['uuid'])['_source']
                # make sure that the linked_uuids match the embedded data
                assert 'linked_uuids_embedded' in index_view_res
                assert 'embedded' in index_view_res
                found_uuids = recursively_find_uuids(index_view_res['embedded'], set())
                # all found uuids must be within the linked_uuids
                assert found_uuids <= set([link['uuid'] for link in index_view_res['linked_uuids_embedded']])
                # if uuids_rev_linking to me, make sure they show up in @@links
                if len(index_view_res.get('uuids_rev_linked_to_me', [])) > 0:
                    links_res = testapp.get('/' + item_res['uuid'] + '/@@links', status=200)
                    link_uuids = [lnk['uuid'] for lnk in links_res.json.get('uuids_linking_to')]
                    assert set(index_view_res['uuids_rev_linked_to_me']) <= set(link_uuids)
                # previously test_html_pages
                try:
                    html_res = htmltestapp.get(item_res['@id'])
                    assert html_res.body.startswith(b'<!DOCTYPE html>')
                except Exception as e:
                    pass
Пример #13
0
def teardown(app, use_collections=TEST_COLLECTIONS):
    import transaction
    from sqlalchemy import MetaData
    from zope.sqlalchemy import mark_changed
    from snovault import DBSESSION
    from snovault.elasticsearch import create_mapping
    from .conftest import indexer_testapp
    # index and then run create mapping to clear things out
    indexer_testapp(app).post_json('/index', {'record': True})
    create_mapping.run(app, collections=use_collections, skip_indexing=True)
    session = app.registry[DBSESSION]
    connection = session.connection().connect()
    meta = MetaData(bind=session.connection(), reflect=True)
    for table in meta.sorted_tables:
        print('Clear table %s' % table)
        print('Count before -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)))
        connection.execute(table.delete())
        print('Count after -->', str(connection.scalar("SELECT COUNT(*) FROM %s" % table)), '\n')
    session.flush()
    mark_changed(session())
    transaction.commit()
Пример #14
0
def test_es_purge_uuid(app, testapp, indexer_testapp, session):
    indexer_queue = app.registry[INDEXER_QUEUE]
    es = app.registry[ELASTIC_SEARCH]
    ## Adding new test resource to DB
    storage = app.registry[STORAGE]
    test_body = {'required': '', 'simple1' : 'foo', 'simple2' : 'bar' }
    res = testapp.post_json(TEST_COLL, test_body)
    test_uuid = res.json['@graph'][0]['uuid']
    check = storage.get_by_uuid(test_uuid)

    assert str(check.uuid) == test_uuid

    # Then index it:
    create_mapping.run(app, collections=[TEST_TYPE], sync_index=True, purge_queue=True)
    time.sleep(4)

    ## Now ensure that we do have it in ES:
    try:
        es_item = es.get(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid)
    except:
        assert False
    item_uuid = es_item.get('_source', {}).get('uuid')
    assert item_uuid == test_uuid

    check_post_from_rdb = storage.write.get_by_uuid(test_uuid)
    assert check_post_from_rdb is not None

    assert es_item['_source']['embedded']['simple1'] == test_body['simple1']
    assert es_item['_source']['embedded']['simple2'] == test_body['simple2']

    # The actual delete
    storage.purge_uuid(test_uuid) # We can optionally pass in TEST_TYPE as well for better performance.

    check_post_from_rdb_2 = storage.write.get_by_uuid(test_uuid)

    assert check_post_from_rdb_2 is None

    time.sleep(5) # Allow time for ES API to send network request to ES server to perform delete.
    check_post_from_es_2 = es.get(index=TEST_TYPE, doc_type=TEST_TYPE, id=test_uuid, ignore=[404])
    assert check_post_from_es_2['found'] == False
Пример #15
0
def test_create_mapping_check_first(app, testapp, indexer_testapp):
    # ensure create mapping has been run
    from snovault.elasticsearch import create_mapping
    es = app.registry[ELASTIC_SEARCH]
    # post an item and then index it
    testapp.post_json(TEST_COLL, {'required': ''})
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(4)
    initial_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    # make sure the meta entry is created
    assert es.get(index='meta', doc_type='meta', id=TEST_TYPE)

    # run with check_first but skip indexing. counts should still match because
    # the index wasn't removed
    create_mapping.run(app, check_first=True, collections=[TEST_TYPE], skip_indexing=True)
    time.sleep(4)
    assert es.get(index='meta', doc_type='meta', id=TEST_TYPE)
    second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    counter = 0
    while (second_count != initial_count and counter < 10):
        time.sleep(2)
        second_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
        counter +=1
    assert second_count == initial_count

    # make sure the meta entry is still there
    assert es.get(index='meta', doc_type='meta', id=TEST_TYPE)

    # remove the index manually and do not index
    # should cause create_mapping w/ check_first to recreate
    es.delete(index='meta', doc_type='meta', id=TEST_TYPE)
    es.indices.delete(index=TEST_TYPE)
    create_mapping.run(app, collections=[TEST_TYPE], check_first=True, skip_indexing=True)
    third_count = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    assert third_count == 0
    # but the meta entry should be there
    assert es.get(index='meta', doc_type='meta', id=TEST_TYPE)
Пример #16
0
def app(app_settings):
    from .. import test_indexing
    from snovault.elasticsearch import create_mapping
    for app in test_indexing.app(app_settings):
        create_mapping.run(app)
        yield app
Пример #17
0
def test_aggregated_items(app, testapp, indexer_testapp):
    """
    Test that the item aggregation works, which only occurs when indexing
    is actually run. This test does the following:
    - Post a TestingLinkAggregateSno, which links to 2 TestingLinkSourceSno
    - Check aggregated-items view for the item; should be empty before indexing
    - Index and retrieve the TestingLinkAggregateSno from ES
    - Check that the aggregations worked correctly
    - Patch the TestingLinkAggregateSno to only 1 TestingLinkSourceSno, index
    - Ensure that the aggregated_items changed, checking ES
    - Ensure that duplicate aggregated_items are deduplicated
    - Check aggregated-items view; should now match ES results
    """
    import webtest
    es = app.registry[ELASTIC_SEARCH]
    indexer_queue = app.registry[INDEXER_QUEUE]
    # first, run create mapping with the indices we will use
    create_mapping.run(
        app,
        collections=['testing_link_target_sno', 'testing_link_aggregate_sno'],
        skip_indexing=True
    )
    # generate a uuid for the aggregate item
    agg_res_uuid = str(uuid.uuid4())
    target1  = {'name': 'one', 'uuid': '775795d3-4410-4114-836b-8eeecf1d0c2f'}
    target2  = {'name': 'two', 'uuid': '775795d3-4410-4114-836b-8eeecf1daabc'}
    aggregated = {
        'name': 'A',
        'targets': [
            {
                'test_description': 'target one',
                'target': '775795d3-4410-4114-836b-8eeecf1d0c2f'
            },
            {
                'test_description': 'target two',
                'target': '775795d3-4410-4114-836b-8eeecf1daabc'
            }
        ],
        'uuid': agg_res_uuid,
        'status': 'current'
    }
    # you can do stuff like this and it will take effect
    # app.registry['types']['testing_link_aggregate_sno'].aggregated_items['targets'] = ['target.name', 'test_description']
    target1_res = testapp.post_json('/testing-link-targets-sno/', target1, status=201)
    target2_res = testapp.post_json('/testing-link-targets-sno/', target2, status=201)
    agg_res = testapp.post_json('/testing-link-aggregates-sno/', aggregated, status=201)
    agg_res_atid = agg_res.json['@graph'][0]['@id']
    # ensure that aggregated-items view shows nothing before indexing
    pre_agg_view = testapp.get(agg_res_atid + '@@aggregated-items', status=200).json
    assert pre_agg_view['@id'] == agg_res_atid
    assert pre_agg_view['aggregated_items'] == {}
    # wait for the items to index
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(2)
    # wait for test-link-aggregated item to index
    doc_count = es.count(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno').get('count')
    tries = 0
    while doc_count < 1 and tries < 5:
        time.sleep(2)
        doc_count = es.count(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno').get('count')
        tries += 1
    assert doc_count == 1
    es_agg_res = es.get(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno', id=agg_res_uuid)
    assert 'aggregated_items' in es_agg_res['_source']
    es_agg_items = es_agg_res['_source']['aggregated_items']
    assert 'targets' in es_agg_items
    assert len(es_agg_items['targets']) == 2
    for idx, target_agg in enumerate(es_agg_items['targets']):
        # order of targets should be maintained
        assert target_agg['parent'] == agg_res.json['@graph'][0]['@id']
        assert target_agg['embedded_path'] == 'targets'
        if idx == 0:
            assert target_agg['item']['test_description'] == 'target one'
            assert target_agg['item']['target']['uuid'] == target1['uuid']
        else:
            assert target_agg['item']['test_description'] == 'target two'
            assert target_agg['item']['target']['uuid'] == target2['uuid']
    # now make sure they get updated on a patch
    # use duplicate items, which should be deduplicated
    testapp.patch_json('/testing-link-aggregates-sno/' + aggregated['uuid'],
                       {'targets': [
                           {'test_description': 'target one revised',
                            'target': '775795d3-4410-4114-836b-8eeecf1d0c2f'},
                           {'test_description': 'target one revised',
                            'target': '775795d3-4410-4114-836b-8eeecf1d0c2f'},
                        ]})
    indexer_testapp.post_json('/index', {'record': True})
    time.sleep(10)  # be lazy and just wait a bit
    es_agg_res = es.get(index='testing_link_aggregate_sno', doc_type='testing_link_aggregate_sno', id=agg_res_uuid)
    assert 'aggregated_items' in es_agg_res['_source']
    es_agg_items = es_agg_res['_source']['aggregated_items']
    assert 'targets' in es_agg_items
    assert len(es_agg_items['targets']) == 1
    assert es_agg_items['targets'][0]['item']['test_description'] == 'target one revised'
    # check that the aggregated-items view now works
    post_agg_view = testapp.get(agg_res_atid + '@@aggregated-items', status=200).json
    assert post_agg_view['@id'] == agg_res_atid
    assert post_agg_view['aggregated_items'] == es_agg_res['_source']['aggregated_items']
    # clean up the test items
    testapp.patch_json('/testing-link-aggregates-sno/' + aggregated['uuid'],
                       {'targets': []})
    indexer_testapp.post_json('/index', {'record': True})
Пример #18
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="Run development servers", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--app-name', help="Pyramid app name in configfile")
    parser.add_argument('config_uri', help="path to configfile")
    parser.add_argument('--clear', action="store_true", help="Clear existing data")
    parser.add_argument('--init', action="store_true", help="Init database")
    parser.add_argument('--load', action="store_true", help="Load test set")
    parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir")
    parser.add_argument('--access-key',
                        help="store local or copy to s3, will generate and store access key for admin user", default=None)
    args = parser.parse_args()

    logging.basicConfig(format='')
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('snowvault').setLevel(logging.DEBUG)

    # get the config and see if we want to connect to non-local servers
    config = get_appsettings(args.config_uri, args.app_name)

    from snovault.tests import elasticsearch_fixture, postgresql_fixture
    from snovault.elasticsearch import create_mapping
    datadir = os.path.abspath(args.datadir)
    pgdata = os.path.join(datadir, 'pgdata')
    esdata = os.path.join(datadir, 'esdata')
    ### comment out from HERE...
    if args.clear:
        for dirname in [pgdata, esdata]:
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
    if args.init:
        postgresql_fixture.initdb(pgdata, echo=True)
    ### ... to HERE to disable recreation of test db
    ### may have to `rm /tmp/snovault/pgdata/postmaster.pid`

    postgres = postgresql_fixture.server_process(pgdata, echo=True)
    elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True)
    nginx = nginx_server_process(echo=True)
    processes = [postgres, elasticsearch, nginx]


    @atexit.register
    def cleanup_process():
        for process in processes:
            if process.poll() is None:
                process.terminate()
        for process in processes:
            try:
                for line in process.stdout:
                    sys.stdout.write(line.decode('utf-8'))
            except IOError:
                pass
            process.wait()


    app = get_app(args.config_uri, args.app_name)

    if args.load:
        from pyramid.path import DottedNameResolver
        load_test_data = app.registry.settings.get('snovault.load_test_data')
        load_test_data = DottedNameResolver().resolve(load_test_data)
        load_res = load_test_data(app, args.access_key)
        if load_res:  # None if successful
            raise(load_res)

    if args.init:
        create_mapping.run(app, check_first=False, purge_queue=True)

    print('Started. ^C to exit.')

    stdouts = [p.stdout for p in processes]

    # Ugly should probably use threads instead
    while True:
        readable, writable, err = select.select(stdouts, [], stdouts, 5)
        for stdout in readable:
            for line in iter(stdout.readline, b''):
                sys.stdout.write(line.decode('utf-8'))
        if err:
            for stdout in err:
                for line in iter(stdout.readline, b''):
                    sys.stdout.write(line.decode('utf-8'))
            break
Пример #19
0
def app(app_settings):
    from encoded.tests.test_indexing import _app
    from snovault.elasticsearch import create_mapping
    for app in _app(app_settings):
        create_mapping.run(app)
        yield app
Пример #20
0
def teardown(app, dbapi_conn):
    from snovault.elasticsearch import create_mapping
    create_mapping.run(app)
    cursor = dbapi_conn.cursor()
    cursor.execute("""TRUNCATE resources, transactions CASCADE;""")
    cursor.close()
Пример #21
0
def test_queue_indexing_with_linked(app, testapp, indexer_testapp, dummy_request):
    """
    Test a whole bunch of things here:
    - posting/patching invalidates rev linked items
    - check linked_uuids/rev_link_names/rev_linked_to_me fields in ES
    - test indexer_utils.find_uuids_for_indexing fxn
    - test check_es_and_cache_linked_sids & validate_es_content
    - test purge functionality before and after removing links to an item
    """
    import webtest
    from snovault import util
    from pyramid.traversal import traverse
    from snovault.tests.testing_views import TestingLinkSourceSno
    es = app.registry[ELASTIC_SEARCH]
    indexer_queue = app.registry[INDEXER_QUEUE]
    # first, run create mapping with the indices we will use
    create_mapping.run(
        app,
        collections=['testing_link_target_sno', 'testing_link_source_sno'],
        skip_indexing=True
    )
    ppp_res = testapp.post_json(TEST_COLL, {'required': ''})
    ppp_uuid = ppp_res.json['@graph'][0]['uuid']
    target  = {'name': 'one', 'uuid': '775795d3-4410-4114-836b-8eeecf1d0c2f'}
    source = {
        'name': 'A',
        'target': '775795d3-4410-4114-836b-8eeecf1d0c2f',
        'ppp': ppp_uuid,
        'uuid': '16157204-8c8f-4672-a1a4-14f4b8021fcd',
        'status': 'current',
    }
    target_res = testapp.post_json('/testing-link-targets-sno/', target, status=201)
    res = indexer_testapp.post_json('/index', {'record': True})
    time.sleep(2)
    # wait for the first item to index
    doc_count_target = es.count(index='testing_link_target_sno', doc_type='testing_link_target_sno').get('count')
    doc_count_ppp = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
    tries = 0
    while (doc_count_target < 1 or doc_count_ppp < 1) and tries < 5:
        time.sleep(4)
        doc_count_target = es.count(index='testing_link_target_sno', doc_type='testing_link_target_sno').get('count')
        doc_count_ppp = es.count(index=TEST_TYPE, doc_type=TEST_TYPE).get('count')
        tries += 1
    assert doc_count_target == 1
    assert doc_count_ppp == 1
    # indexing the source will also reindex the target, since it has a reverse link
    source_res = testapp.post_json('/testing-link-sources-sno/', source, status=201)
    source_uuid = source_res.json['@graph'][0]['uuid']
    time.sleep(2)
    res = indexer_testapp.post_json('/index', {'record': True})
    assert res.json['indexing_count'] == 2
    time.sleep(2)
    # wait for them to index
    doc_count = es.count(index='testing_link_source_sno', doc_type='testing_link_source_sno').get('count')
    tries = 0
    while doc_count < 1 and tries < 5:
        time.sleep(4)
        doc_count = es.count(index='testing_link_source_sno', doc_type='testing_link_source_sno').get('count')
    assert doc_count == 1
    # patching json will not queue the embedded ppp
    # the target will be indexed though, since it has a linkTo back to the source
    testapp.patch_json('/testing-link-sources-sno/' + source_uuid, {'name': 'ABC'})
    time.sleep(2)
    res = indexer_testapp.post_json('/index', {'record': True})
    assert res.json['indexing_count'] == 2

    time.sleep(3)
    # check some stuff on the es results for source and target
    es_source = es.get(index='testing_link_source_sno', doc_type='testing_link_source_sno', id=source['uuid'])
    uuids_linked_emb = [link['uuid'] for link in es_source['_source']['linked_uuids_embedded']]
    uuids_linked_obj = [link['uuid'] for link in es_source['_source']['linked_uuids_object']]
    assert set(uuids_linked_emb) == {target['uuid'], source['uuid'], ppp_uuid}
    assert uuids_linked_obj == [source['uuid']]
    assert es_source['_source']['rev_link_names'] == {}
    assert es_source['_source']['rev_linked_to_me'] == [target['uuid']]

    es_target = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno', id=target['uuid'])
    # just the source uuid itself in the linked uuids for the object view
    uuids_linked_emb2 = [link['uuid'] for link in es_target['_source']['linked_uuids_embedded']]
    uuids_linked_obj2 = [link['uuid'] for link in es_target['_source']['linked_uuids_object']]
    assert set(uuids_linked_emb2) == {target['uuid'], source['uuid']}
    assert uuids_linked_obj2 == [target['uuid']]
    assert es_target['_source']['rev_link_names'] == {'reverse': [source['uuid']]}
    assert es_target['_source']['rev_linked_to_me'] == []

    # test find_uuids_for_indexing
    to_index = indexer_utils.find_uuids_for_indexing(app.registry, {target['uuid']})
    assert to_index == {target['uuid'], source['uuid']}
    to_index = indexer_utils.find_uuids_for_indexing(app.registry, {ppp_uuid})
    assert to_index == {ppp_uuid, source['uuid']}
    # this will return the target uuid, since it has an indexed rev link
    to_index = indexer_utils.find_uuids_for_indexing(app.registry, {source['uuid']})
    assert to_index == {target['uuid'], source['uuid']}
    # now use a made-up uuid; only result should be itself
    fake_uuid = str(uuid.uuid4())
    to_index = indexer_utils.find_uuids_for_indexing(app.registry, {fake_uuid})
    assert to_index == {fake_uuid}

    # test @@links functionality
    source_links_res = testapp.get('/' + source['uuid'] + '/@@links', status=200)
    linking_uuids = source_links_res.json.get('uuids_linking_to')
    assert linking_uuids and len(linking_uuids) == 1
    assert linking_uuids[0]['uuid'] == target['uuid']  # rev_link from target

    # test check_es_and_cache_linked_sids and validate_es_content
    # must get the context object through request traversal
    dummy_request.datastore = 'database'
    assert dummy_request._sid_cache == {}
    source_ctxt = traverse(dummy_request.root, source_res.json['@graph'][0]['@id'])['context']
    target_ctxt = traverse(dummy_request.root, target_res.json['@graph'][0]['@id'])['context']
    # first check frame=object for target
    tar_es_res_obj = util.check_es_and_cache_linked_sids(target_ctxt, dummy_request, 'object')
    assert tar_es_res_obj['uuid'] == target['uuid']
    assert set(uuids_linked_obj2) == set(dummy_request._sid_cache)
    # frame=embedded for source
    src_es_res_emb = util.check_es_and_cache_linked_sids(source_ctxt, dummy_request, 'embedded')
    assert src_es_res_emb['uuid'] == source['uuid']
    assert set(uuids_linked_emb) == set(dummy_request._sid_cache)
    # make everything in _sid_cache is present and up to date
    for rid in dummy_request._sid_cache:
        found_sid = dummy_request.registry[STORAGE].write.get_by_uuid(rid).sid
        assert dummy_request._sid_cache.get(rid) == found_sid
    # test validate_es_content with the correct sids and then an incorrect one
    valid = util.validate_es_content(source_ctxt, dummy_request, src_es_res_emb, 'embedded')
    assert valid is True

    # lastly, test purge_uuid and delete functionality
    with pytest.raises(webtest.AppError) as excinfo:
        del_res0 = testapp.delete_json('/' + source['uuid'] + '/?purge=True')
    assert 'Item status must equal deleted before purging' in str(excinfo.value)
    del_res1 = testapp.delete_json('/' + source['uuid'])
    assert del_res1.json['status'] == 'success'
    # this item will still have items linking to it indexing occurs
    with pytest.raises(webtest.AppError) as excinfo:
        del_res2 = testapp.delete_json('/' + source['uuid'] + '/?purge=True')
    assert 'Cannot purge item as other items still link to it' in str(excinfo.value)
    # the source should fail due to outdated sids
    # must manually update _sid_cache on dummy_request for source
    src_sid = dummy_request.registry[STORAGE].write.get_by_uuid(source['uuid']).sid
    dummy_request._sid_cache[source['uuid']] = src_sid
    valid2 = util.validate_es_content(source_ctxt, dummy_request, src_es_res_emb, 'embedded')
    assert valid2 is False
    # the target should fail due to outdated rev_links (at least frame=object)
    # need to get a new the target context again, otherwise get a sqlalchemy error
    target_ctxt2 = traverse(dummy_request.root, target_res.json['@graph'][0]['@id'])['context']
    valid3 = util.validate_es_content(target_ctxt2, dummy_request, tar_es_res_obj, 'object')
    assert valid3 is False
    res = indexer_testapp.post_json('/index', {'record': True})
    del_res3 = testapp.delete_json('/' + source['uuid'] + '/?purge=True')
    assert del_res3.json['status'] == 'success'
    assert del_res3.json['notification'] == 'Permanently deleted ' + source['uuid']
    time.sleep(3)
    # make sure everything has updated on ES
    check_es_source = es.get(index='testing_link_source_sno', doc_type='testing_link_source_sno',
                             id=source['uuid'], ignore=[404])
    assert check_es_source['found'] == False
    # source uuid removed from the target uuid
    check_es_target = es.get(index='testing_link_target_sno', doc_type='testing_link_target_sno',
                             id=target['uuid'])
    uuids_linked_emb2 = [link['uuid'] for link in check_es_target['_source']['linked_uuids_embedded']]
    assert source['uuid'] not in uuids_linked_emb2
    # the source is now purged
    testapp.get('/' + source['uuid'], status=404)
    # make sure check_es_and_cache_linked_sids fails for the purged item
    es_res_emb2 = util.check_es_and_cache_linked_sids(source_ctxt, dummy_request, 'embedded')
    assert es_res_emb2 is None
Пример #22
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="Run development servers",
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--app-name', help="Pyramid app name in configfile")
    parser.add_argument('config_uri', help="path to configfile")
    parser.add_argument('--clear',
                        action="store_true",
                        help="Clear existing data")
    parser.add_argument('--init', action="store_true", help="Init database")
    parser.add_argument('--load', action="store_true", help="Load test set")
    parser.add_argument('--datadir',
                        default='/tmp/snovault',
                        help="path to datadir")
    args = parser.parse_args()

    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('snovault').setLevel(logging.INFO)

    from snovault.tests import elasticsearch_fixture, postgresql_fixture
    from snovault.elasticsearch import create_mapping
    datadir = os.path.abspath(args.datadir)
    pgdata = os.path.join(datadir, 'pgdata')
    esdata = os.path.join(datadir, 'esdata')
    if args.clear:
        for dirname in [pgdata, esdata]:
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
    if args.init:
        postgresql_fixture.initdb(pgdata, echo=True)

    postgres = postgresql_fixture.server_process(pgdata, echo=True)
    elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True)
    nginx = nginx_server_process(echo=True)
    processes = [postgres, elasticsearch, nginx]

    print_processes = []

    @atexit.register
    def cleanup_process():
        for process in processes:
            if process.poll() is None:
                process.terminate()
        for process in processes:
            try:
                for line in process.stdout:
                    sys.stdout.write(line.decode('utf-8'))
            except IOError:
                pass
            process.wait()
        for p in print_processes:
            p.terminate()

    if args.init:
        app = get_app(args.config_uri, args.app_name)
        create_mapping.run(app)

    if args.load:
        from pyramid.path import DottedNameResolver
        load_test_data = app.registry.settings.get('snovault.load_test_data')
        load_test_data = DottedNameResolver().resolve(load_test_data)
        load_test_data(app)

    print('Started. ^C to exit.')

    stdouts = [p.stdout for p in processes]

    def print_to_terminal(stdout):
        while True:
            for line in iter(stdout.readline, b''):
                sys.stdout.write(line.decode('utf-8'))

    readable, writable, err = select.select(stdouts, [], stdouts, 5)
    for stdout in readable:
        print_processes.append(
            Process(target=print_to_terminal, args=(stdout, )))
    for stdout in err:
        print_processes.append(
            Process(target=print_to_terminal, args=(stdout, )))
    for p in print_processes:
        p.start()
Пример #23
0
def app(app_settings):
    from .. import test_indexing
    from snovault.elasticsearch import create_mapping
    for app in test_indexing.app(app_settings):
        create_mapping.run(app)
        yield app
Пример #24
0
def teardown(app, dbapi_conn):
    from snovault.elasticsearch import create_mapping
    create_mapping.run(app)
    cursor = dbapi_conn.cursor()
    cursor.execute("""TRUNCATE resources, transactions CASCADE;""")
    cursor.close()
Пример #25
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="Run development servers", epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--app-name', help="Pyramid app name in configfile")
    parser.add_argument('config_uri', help="path to configfile")
    parser.add_argument('--clear', action="store_true", help="Clear existing data")
    parser.add_argument('--init', action="store_true", help="Init database")
    parser.add_argument('--load', action="store_true", help="Load test set")
    parser.add_argument('--datadir', default='/tmp/snovault', help="path to datadir")
    args = parser.parse_args()

    logging.basicConfig()
    # Loading app will have configured from config file. Reconfigure here:
    logging.getLogger('snovault').setLevel(logging.INFO)

    from snovault.tests import elasticsearch_fixture, postgresql_fixture
    from snovault.elasticsearch import create_mapping
    datadir = os.path.abspath(args.datadir)
    pgdata = os.path.join(datadir, 'pgdata')
    esdata = os.path.join(datadir, 'esdata')
    if args.clear:
        for dirname in [pgdata, esdata]:
            if os.path.exists(dirname):
                shutil.rmtree(dirname)
    if args.init:
        postgresql_fixture.initdb(pgdata, echo=True)

    postgres = postgresql_fixture.server_process(pgdata, echo=True)
    elasticsearch = elasticsearch_fixture.server_process(esdata, echo=True)
    nginx = nginx_server_process(echo=True)
    processes = [postgres, elasticsearch, nginx]

    print_processes = []

    @atexit.register
    def cleanup_process():
        for process in processes:
            if process.poll() is None:
                process.terminate()
        for process in processes:
            try:
                for line in process.stdout:
                    sys.stdout.write(line.decode('utf-8'))
            except IOError:
                pass
            process.wait()
        for p in print_processes:
            p.terminate()

    if args.init:
        app = get_app(args.config_uri, args.app_name)
        create_mapping.run(app)

    if args.load:
        from pyramid.path import DottedNameResolver
        load_test_data = app.registry.settings.get('snovault.load_test_data')
        load_test_data = DottedNameResolver().resolve(load_test_data)
        load_test_data(app)

    print('Started. ^C to exit.')

    stdouts = [p.stdout for p in processes]

    def print_to_terminal(stdout):
        while True:
            for line in iter(stdout.readline, b''):
                sys.stdout.write(line.decode('utf-8'))


    readable, writable, err = select.select(stdouts, [], stdouts, 5)
    for stdout in readable:
        print_processes.append(Process(target=print_to_terminal, args=(stdout,)))
    for stdout in err:
        print_processes.append(Process(target=print_to_terminal, args=(stdout,)))
    for p in print_processes:
        p.start()