Exemplo n.º 1
0
def test_record_index_after_update(app, test_users, test_records, script_info,
                                   login_user):
    """Check that updating a record also reindex it."""
    creator = test_users['deposits_creator']

    with app.app_context():
        rec = Record.get_record(test_records[0].record_id)
        pid = test_records[0].pid
        rec.update({'title': 'my modified title'})
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
        search_url = url_for('b2share_records_rest.b2rec_list')

    headers = [('Content-Type', 'application/json'),
               ('Accept', 'application/json')]

    with app.test_client() as client:
        record_search_res = client.get(
            search_url,
            data='',
            headers=headers)
        assert record_search_res.status_code == 200
        record_search_data = json.loads(
            record_search_res.get_data(as_text=True))
        assert record_search_data['hits']['total'] == len(test_records)
        found_rec = [rec for rec in record_search_data['hits']['hits']
                     if rec['id'] == pid][0]
        assert rec['title'] == 'my modified title'
Exemplo n.º 2
0
def test_record_index_after_update(app, test_users, test_records, script_info,
                                   login_user):
    """Check that updating a record also reindex it."""
    creator = test_users['deposits_creator']

    with app.app_context():
        rec = Record.get_record(test_records[0].record_id)
        pid = test_records[0].pid
        rec.update({'title': 'my modified title'})
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
        search_url = url_for('b2share_records_rest.b2rec_list')

    headers = [('Content-Type', 'application/json'),
               ('Accept', 'application/json')]

    with app.test_client() as client:
        record_search_res = client.get(
            search_url,
            data='',
            headers=headers)
        assert record_search_res.status_code == 200
        record_search_data = json.loads(
            record_search_res.get_data(as_text=True))
        assert record_search_data['hits']['total'] == len(test_records)
        found_rec = [rec for rec in record_search_data['hits']['hits']
                     if rec['id'] == pid][0]
        assert rec['title'] == 'my modified title'
Exemplo n.º 3
0
def elasticsearch_index_reindex(alembic, verbose):
    """Reindex records."""
    query = (x[0] for x in PersistentIdentifier.query.filter_by(
        object_type='rec', status=PIDStatus.REGISTERED).filter(
            PersistentIdentifier.pid_type.in_(['b2rec', 'b2dep'])).values(
                PersistentIdentifier.object_uuid))
    RecordIndexer().bulk_index(query)
    process_bulk_queue.delay()
Exemplo n.º 4
0
def elasticsearch_index_reindex(alembic, verbose):
    """Reindex records."""
    query = (x[0] for x in PersistentIdentifier.query.filter_by(
            object_type='rec', status=PIDStatus.REGISTERED
        ).filter(
            PersistentIdentifier.pid_type.in_(['b2rec', 'b2dep'])
        ).values(
            PersistentIdentifier.object_uuid
        ))
    RecordIndexer().bulk_index(query)
    process_bulk_queue.delay()
Exemplo n.º 5
0
def manage_indexer_queues():
    """Peeks into queues and spawns bulk indexers."""
    channel = current_celery_app.connection().channel()
    indexers = current_indexer_registry.all()

    for name, indexer in indexers.items():
        queue = indexer.mq_queue.bind(channel)
        _, num_messages, num_consumers = queue.queue_declare()
        max_consumers = current_app.config["INDEXER_MAX_BULK_CONSUMERS"]

        if num_messages > 0 and num_consumers < max_consumers:
            process_bulk_queue.delay(indexer_name=name)
Exemplo n.º 6
0
def test_unpublished_deposit_unindex(app, test_users, draft_deposits,
                                     script_info, login_user):
    """Check that deleting an unpublished deposit also removes it from the search index."""
    creator = test_users['deposits_creator']

    with app.app_context():
        Deposit.get_record(draft_deposits[0].deposit_id).delete()
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
    # deleted record should not be searchable
    subtest_record_search(app, creator, [], draft_deposits[1:], login_user)
Exemplo n.º 7
0
def test_published_deposit_unindex(app, test_users, test_records, script_info,
                                   login_user):
    """Check that deleting a published deposit also removes it from the search index."""
    creator = test_users['deposits_creator']

    with app.app_context():
        Deposit.get_record(test_records[0].deposit_id).delete()
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
    # deleted record should not be searchable
    subtest_record_search(app, creator, test_records, test_records[1:],
                          login_user)
Exemplo n.º 8
0
def test_record_indexing(app, test_users, test_records, script_info,
                           login_user):
    """Test record indexing and reindexing."""
    creator = test_users['deposits_creator']

    with app.app_context():
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
    # records and deposits should be indexed
    subtest_record_search(app, creator, test_records, test_records, login_user)

    with app.app_context():
        current_search_client.indices.flush('*')
        # delete all elasticsearch indices and recreate them
        for deleted in current_search.delete(ignore=[404]):
            pass
        for created in current_search.create(None):
            pass
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

    # all records should have been deleted
    subtest_record_search(app, creator, [], [], login_user)

    with app.app_context():
        runner = CliRunner()
        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code
        # schedule a reindex task
        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

    # records and deposits should be indexed again
    subtest_record_search(app, creator, test_records, test_records, login_user)
Exemplo n.º 9
0
def test_record_indexing(app, test_users, test_records, script_info,
                           login_user):
    """Test record indexing and reindexing."""
    creator = test_users['deposits_creator']

    with app.app_context():
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')
    # records and deposits should be indexed
    subtest_record_search(app, creator, test_records, test_records, login_user)

    with app.app_context():
        current_search_client.indices.flush('*')
        # delete all elasticsearch indices and recreate them
        for deleted in current_search.delete(ignore=[404]):
            pass
        for created in current_search.create(None):
            pass
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

    # all records should have been deleted
    subtest_record_search(app, creator, [], [], login_user)

    with app.app_context():
        runner = CliRunner()
        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code
        # schedule a reindex task
        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

    # records and deposits should be indexed again
    subtest_record_search(app, creator, test_records, test_records, login_user)
Exemplo n.º 10
0
def test_delete_record(app, test_users, test_communities, login_user,
                       script_info):
    """Test record deletion through the REST API."""
    from click.testing import CliRunner
    from invenio_search import current_search_client
    from invenio_indexer import cli
    from invenio_indexer.tasks import process_bulk_queue
    uploaded_files = {
        'myfile1.dat': b'contents1',
        'myfile2.dat': b'contents2'
    }
    admin = test_users['admin']

    headers = [('Accept', 'application/json')]
    with app.app_context():
        creator = create_user('creator')
        non_creator = create_user('non_creator')

        record_data = generate_record_data()
        with app.test_client() as client:
            deposit, record_pid, record = create_record(
                record_data, creator, files=uploaded_files
            )
            pid_value = record_pid.pid_value
            record_id = record.id
            bucket_id = record.files.bucket.id
            object_version = record.files.bucket.objects[0]
            deposit_bucket_id = deposit.files.bucket.id
            deposit_object_version = deposit.files.bucket.objects[0]

            record_url = url_for('b2share_records_rest.b2rec_item',
                                 pid_value=pid_value)
            deposit_url = url_for('b2share_deposit_rest.b2dep_item',
                                  pid_value=pid_value)
            bucket_url = url_for('invenio_files_rest.bucket_api',
                                 bucket_id=bucket_id)
            deposit_bucket_url = url_for('invenio_files_rest.bucket_api',
                                         bucket_id=deposit_bucket_id)
            object_version_url = url_for(
                'invenio_files_rest.object_api',
                bucket_id=bucket_id,
                version=object_version.version_id,
                key=object_version.key
            )
            deposit_object_version_url = url_for(
                'invenio_files_rest.object_api',
                bucket_id=deposit_bucket_id,
                version=deposit_object_version.version_id,
                key=deposit_object_version.key
            )
        # check that the record and deposit are searchable
        current_search_client.indices.flush('*')

        res = current_search_client.search(index='records')
        assert res['hits']['total'] == 1
        res = current_search_client.search(index='deposits')
        assert res['hits']['total'] == 1

    def test_delete(status, user=None):
        with app.test_client() as client:
            if user is not None:
                login_user(user, client)
            # delete the record
            request_res = client.delete(record_url, headers=headers)
            assert request_res.status_code == status

    def test_access(user=None, deleted=True):
        with app.test_client() as client:
            if user is not None:
                login_user(user, client)
            # try accessing the record
            request_res = client.get(record_url, headers=headers)
            assert request_res.status_code == 410 if deleted else 200
            # try accessing the file bucket
            request_res = client.get(bucket_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200
            # try accessing the file
            request_res = client.get(object_version_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200

            # try accessing the deposit
            request_res = client.get(deposit_url, headers=headers)
            assert request_res.status_code == 410 if deleted else 200
            # try accessing the deposit file bucket
            request_res = client.get(deposit_bucket_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200
            # try accessing the deposit file
            request_res = client.get(deposit_object_version_url,
                                     headers=headers)
            assert request_res.status_code == 404 if deleted else 200

    # Check that everything is accessible
    test_access(creator, deleted=False)

    test_delete(401)  # anonymous user
    test_delete(403, creator)
    test_delete(403, non_creator)
    test_delete(200, admin)

    test_access()  # anonymous user
    test_access(creator)
    test_access(non_creator)
    test_access(admin)

    # Check that reindexing records does not index deleted records and deposits
    with app.app_context():
        runner = CliRunner()
        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code
        # schedule a reindex task
        res = runner.invoke(cli.reindex, ['--yes-i-know'],
                            obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

        # check that the record and deposit are not indexed
        res = current_search_client.search(index='records')
        assert res['hits']['total'] == 0
        res = current_search_client.search(index='deposits')
        assert res['hits']['total'] == 0
Exemplo n.º 11
0
def test_delete_record(app, test_users, test_communities, login_user,
                       script_info):
    """Test record deletion through the REST API."""
    from click.testing import CliRunner
    from invenio_search import current_search_client
    from invenio_indexer import cli
    from invenio_indexer.tasks import process_bulk_queue
    uploaded_files = {
        'myfile1.dat': b'contents1',
        'myfile2.dat': b'contents2'
    }
    admin = test_users['admin']

    headers = [('Accept', 'application/json')]
    with app.app_context():
        creator = create_user('creator')
        non_creator = create_user('non_creator')

        record_data = generate_record_data()
        with app.test_client() as client:
            deposit, record_pid, record = create_record(
                record_data, creator, files=uploaded_files
            )
            pid_value = record_pid.pid_value
            record_id = record.id
            bucket_id = record.files.bucket.id
            object_version = record.files.bucket.objects[0]
            deposit_bucket_id = deposit.files.bucket.id
            deposit_object_version = deposit.files.bucket.objects[0]

            record_url = url_for('b2share_records_rest.b2rec_item',
                                 pid_value=pid_value)
            deposit_url = url_for('b2share_deposit_rest.b2dep_item',
                                  pid_value=pid_value)
            bucket_url = url_for('invenio_files_rest.bucket_api',
                                 bucket_id=bucket_id)
            deposit_bucket_url = url_for('invenio_files_rest.bucket_api',
                                         bucket_id=deposit_bucket_id)
            object_version_url = url_for(
                'invenio_files_rest.object_api',
                bucket_id=bucket_id,
                version=object_version.version_id,
                key=object_version.key
            )
            deposit_object_version_url = url_for(
                'invenio_files_rest.object_api',
                bucket_id=deposit_bucket_id,
                version=deposit_object_version.version_id,
                key=deposit_object_version.key
            )
        # check that the record and deposit are searchable
        current_search_client.indices.flush('*')

        res = current_search_client.search(index='records')
        assert res['hits']['total'] == 1
        res = current_search_client.search(index='deposits')
        assert res['hits']['total'] == 1

    def test_delete(status, user=None):
        with app.test_client() as client:
            if user is not None:
                login_user(user, client)
            # delete the record
            request_res = client.delete(record_url, headers=headers)
            assert request_res.status_code == status

    def test_access(user=None, deleted=True):
        with app.test_client() as client:
            if user is not None:
                login_user(user, client)
            # try accessing the record
            request_res = client.get(record_url, headers=headers)
            assert request_res.status_code == 410 if deleted else 200
            # try accessing the file bucket
            request_res = client.get(bucket_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200
            # try accessing the file
            request_res = client.get(object_version_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200

            # try accessing the deposit
            request_res = client.get(deposit_url, headers=headers)
            assert request_res.status_code == 410 if deleted else 200
            # try accessing the deposit file bucket
            request_res = client.get(deposit_bucket_url, headers=headers)
            assert request_res.status_code == 404 if deleted else 200
            # try accessing the deposit file
            request_res = client.get(deposit_object_version_url,
                                     headers=headers)
            assert request_res.status_code == 404 if deleted else 200

    # Check that everything is accessible
    test_access(creator, deleted=False)
    test_delete(401)  # anonymous user
    test_delete(403, creator)
    test_delete(403, non_creator)
    test_delete(204, admin)

    test_access()  # anonymous user
    test_access(creator)
    test_access(non_creator)
    test_access(admin)

    # Check that reindexing records does not index deleted records and deposits
    with app.app_context():
        runner = CliRunner()
        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'],
                            obj=script_info)
        assert 0 == res.exit_code
        # schedule a reindex task
        res = runner.invoke(cli.reindex, ['--yes-i-know'],
                            obj=script_info)
        assert 0 == res.exit_code
        res = runner.invoke(cli.run, [], obj=script_info)
        assert 0 == res.exit_code
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

        # check that the record and deposit are not indexed
        res = current_search_client.search(index='records')
        assert res['hits']['total'] == 0
        res = current_search_client.search(index='deposits')
        assert res['hits']['total'] == 0
Exemplo n.º 12
0
def test_record_indexing(app, test_users, test_records, script_info,
                         login_user):
    """Test record indexing."""

    creator = test_users['deposits_creator']

    with app.app_context():
        current_search_client.indices.flush('*')
        # delete all elasticsearch indices
        res = current_search_client.search(index="records",
                                           body={"query": {
                                               "match_all": {}
                                           }})

    with app.app_context():
        # delete all elasticsearch indices and recreate them
        for deleted in current_search.delete(ignore=[404]):
            pass
        for created in current_search.create(None):
            pass

        runner = CliRunner()
        # Initialize queue
        res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info)
        assert 0 == res.exit_code
        # schedule a reindex task
        res = runner.invoke(cli.reindex, ['--yes-i-know'], obj=script_info)
        assert 0 == res.exit_code
        # execute scheduled tasks synchronously
        process_bulk_queue.delay()
        # flush the indices so that indexed records are searchable
        current_search_client.indices.flush('*')

        search_url = url_for('b2share_records_rest.b2rec_list')
        search_deposits_url = url_for('b2share_records_rest.b2rec_list',
                                      drafts=1)

    headers = [('Content-Type', 'application/json'),
               ('Accept', 'application/json')]

    with app.app_context():
        # delete all elasticsearch indices
        res = current_search_client.search(index="records",
                                           body={"query": {
                                               "match_all": {}
                                           }})
        pass

    # search for published records
    with app.test_client() as client:
        record_search_res = client.get(search_url, data='', headers=headers)
        assert record_search_res.status_code == 200
        record_search_data = json.loads(
            record_search_res.get_data(as_text=True))
        assert record_search_data['hits']['total'] == len(test_records)
        record_pids = [hit['id'] for hit in record_search_data['hits']['hits']]
        expected_record_pids = [str(rec[1]) for rec in test_records]
        record_pids.sort()
        expected_record_pids.sort()
        assert record_pids == expected_record_pids

    # search for draft records
    with app.test_client() as client:
        login_user(creator, client)
        deposit_search_res = client.get(search_deposits_url,
                                        data='',
                                        headers=headers)
        assert deposit_search_res.status_code == 200
        deposit_search_data = json.loads(
            deposit_search_res.get_data(as_text=True))
        assert deposit_search_data['hits']['total'] == len(test_records)
        deposit_pids = [
            hit['id'] for hit in deposit_search_data['hits']['hits']
        ]
        expected_deposit_pids = [rec[0].hex for rec in test_records]
        deposit_pids.sort()
        expected_deposit_pids.sort()
        assert deposit_pids == expected_deposit_pids
Exemplo n.º 13
0
def test_process_bulk_queue(app):
    """Test index records."""
    with patch('invenio_indexer.api.RecordIndexer.process_bulk_queue') as fun:
        process_bulk_queue.delay()
        assert fun.called
Exemplo n.º 14
0
def test_process_bulk_queue(app):
    """Test index records."""
    with patch('invenio_indexer.api.RecordIndexer.process_bulk_queue') as fun:
        process_bulk_queue.delay()
        assert fun.called