Exemplo n.º 1
0
def test_failing_receiver(base_app, caplog):
    """Test failing signal receiver function."""
    try:
        _signals = Namespace()
        my_signal = _signals.signal('my-signal')

        def failing_event_builder(event, sender_app):
            raise Exception('builder-exception')

        # NOTE: event_0 already exists from the mocked events decorate further.
        base_app.config['STATS_EVENTS']['event_0'].update({
            'signal':
            my_signal,
            'event_builders': [failing_event_builder]
        })

        InvenioStats(base_app)
        current_queues.declare()

        with caplog.at_level(logging.ERROR):
            my_signal.send(base_app)

        error_logs = [r for r in caplog.records if r.levelno == logging.ERROR]
        assert len(error_logs) == 1
        assert error_logs[0].msg == 'Error building event'
        assert error_logs[0].exc_info[1].args[0] == 'builder-exception'

        # Check that no event was sent to the queue
        assert get_queue_size('stats-event_0') == 0
    finally:
        current_queues.delete()
Exemplo n.º 2
0
def test_failing_receiver(base_app, event_entrypoints, caplog):
    """Test failing signal receiver function."""
    try:
        _signals = Namespace()
        my_signal = _signals.signal('my-signal')

        def failing_event_builder(event, sender_app):
            raise Exception('builder-exception')

        base_app.config.update(
            dict(STATS_EVENTS=dict(event_0=dict(
                signal=my_signal, event_builders=[failing_event_builder]))))
        InvenioStats(base_app)
        current_queues.declare()

        with caplog.at_level(logging.ERROR):
            my_signal.send(base_app)

        error_logs = [r for r in caplog.records if r.levelno == logging.ERROR]
        assert len(error_logs) == 1
        assert error_logs[0].msg == 'Error building event'
        assert error_logs[0].exc_info[1].args[0] == 'builder-exception'

        # Check that no event was sent to the queue
        assert get_queue_size('stats-event_0') == 0
    finally:
        current_queues.delete()
Exemplo n.º 3
0
def test_register_receivers(base_app, event_entrypoints):
    """Test signal-receiving/event-emitting functions registration."""
    try:
        _signals = Namespace()
        my_signal = _signals.signal('my-signal')

        def event_builder1(event, sender_app, signal_param, *args, **kwargs):
            event.update(dict(event_param1=signal_param))
            return event

        def event_builder2(event, sender_app, signal_param, *args, **kwargs):
            event.update(dict(event_param2=event['event_param1'] + 1))
            return event

        base_app.config.update(dict(
            STATS_EVENTS=dict(
                event_0=dict(
                    signal=my_signal,
                    event_builders=[event_builder1, event_builder2]
                )
            )
        ))
        InvenioStats(base_app)
        current_queues.declare()
        my_signal.send(base_app, signal_param=42)
        my_signal.send(base_app, signal_param=42)
        events = [event for event in current_stats.consume('event_0')]
        # two events should have been created from the sent events. They should
        # have been both processed by the two event builders.
        assert events == [{'event_param1': 42, 'event_param2': 43}] * 2
    finally:
        current_queues.delete()
Exemplo n.º 4
0
def clean_app(request, base_app):
    """Application with database and elasticsearch cleaned."""
    with base_app.app_context():
        try:
            db.session.remove()
            drop_database(db.engine.url)
        except ProgrammingError:
            pass
        create_database(db.engine.url)
        # reset elasticsearch
        for deleted in current_search.delete(ignore=[404]):
            pass
        # reset queues
        current_queues.delete()
        current_queues.declare()

    yield base_app

    def finalize():
        with base_app.app_context():
            db.session.remove()
            drop_database(db.engine.url)
            # Dispose the engine in order to close all connections. This is
            # needed for sqlite in memory databases.
            db.engine.dispose()
            current_queues.delete()
    request.addfinalizer(finalize)

    return base_app
Exemplo n.º 5
0
def test_register_receivers(base_app, event_entrypoints):
    """Test signal-receiving/event-emitting functions registration."""
    try:
        _signals = Namespace()
        my_signal = _signals.signal('my-signal')

        def event_builder1(event, sender_app, signal_param, *args, **kwargs):
            event.update(dict(event_param1=signal_param))
            return event

        def event_builder2(event, sender_app, signal_param, *args, **kwargs):
            event.update(dict(event_param2=event['event_param1'] + 1))
            return event

        base_app.config.update(
            dict(STATS_EVENTS=dict(event_0=dict(
                signal=my_signal,
                event_builders=[event_builder1, event_builder2]))))
        InvenioStats(base_app)
        current_queues.declare()
        my_signal.send(base_app, signal_param=42)
        my_signal.send(base_app, signal_param=42)
        events = [event for event in current_stats.consume('event_0')]
        # two events should have been created from the sent events. They should
        # have been both processed by the two event builders.
        assert events == [{'event_param1': 42, 'event_param2': 43}] * 2
    finally:
        current_queues.delete()
Exemplo n.º 6
0
def clean_app(request, base_app):
    """Application with database and elasticsearch cleaned."""
    with base_app.app_context():
        try:
            db.session.remove()
            drop_database(db.engine.url)
        except ProgrammingError:
            pass
        create_database(db.engine.url)
        # reset elasticsearch
        for deleted in current_search.delete(ignore=[404]):
            pass
        # reset queues
        current_queues.delete()
        current_queues.declare()

    yield base_app

    def finalize():
        with base_app.app_context():
            db.session.remove()
            drop_database(db.engine.url)
            # Dispose the engine in order to close all connections. This is
            # needed for sqlite in memory databases.
            db.engine.dispose()
            current_queues.delete()
    request.addfinalizer(finalize)

    return base_app
Exemplo n.º 7
0
def event_queues(app):
    """Delete and declare test queues."""
    current_queues.delete()
    try:
        current_queues.declare()
        yield current_queues.queues
    finally:
        current_queues.delete()
Exemplo n.º 8
0
def event_queues(app):
    """Delete and declare test queues."""
    current_queues.delete()
    try:
        current_queues.declare()
        yield current_queues.queues
    finally:
        current_queues.delete()
Exemplo n.º 9
0
def event_queues(app, event_entrypoints):
    """Delete and declare test queues."""
    current_queues.delete()
    try:
        current_queues.declare()
        yield
    finally:
        current_queues.delete()
Exemplo n.º 10
0
def event_queues(app, event_entrypoints):
    """Delete and declare test queues."""
    current_queues.delete()
    try:
        current_queues.declare()
        yield
    finally:
        current_queues.delete()
Exemplo n.º 11
0
def generate_events(app, file_number=5, event_number=100, robot_event_number=0,
                    start_date=datetime.date(2017, 1, 1),
                    end_date=datetime.date(2017, 1, 7)):
    """Queued events for processing tests."""
    current_queues.declare()

    for t in current_search.put_templates(ignore=[400]):
        pass

    def _unique_ts_gen():
        ts = 0
        while True:
            ts += 1
            yield ts

    def generator_list():
        unique_ts = _unique_ts_gen()
        for file_idx in range(file_number):
            for entry_date in date_range(start_date, end_date):
                file_id = 'F000000000000000000000000000000{}'.\
                    format(file_idx + 1)
                bucket_id = 'B000000000000000000000000000000{}'.\
                    format(file_idx + 1)

                def build_event(is_robot=False):
                    ts = next(unique_ts)
                    return dict(
                        timestamp=datetime.datetime.combine(
                            entry_date,
                            datetime.time(minute=ts % 60,
                                          second=ts % 60)).
                        isoformat(),
                        bucket_id=bucket_id,
                        file_id=file_id,
                        file_key='test.pdf',
                        size=9000,
                        visitor_id=100,
                        is_robot=is_robot
                    )

                for event_idx in range(event_number):
                    yield build_event()
                for event_idx in range(robot_event_number):
                    yield build_event(True)

    mock_queue = Mock()
    mock_queue.consume.return_value = generator_list()
    mock_queue.routing_key = 'stats-file-download'

    EventsIndexer(
        mock_queue,
        preprocessors=[
            build_file_unique_id
        ],
        double_click_window=0
    ).run()
    current_search_client.indices.refresh(index='*')
Exemplo n.º 12
0
def test_queue_exists(app, test_queues_entrypoints, config):
    """Test the "declare" CLI."""
    app.config.update(config)
    with app.app_context():
        for queue in current_queues.queues.values():
            assert not queue.exists
        current_queues.declare()
        for queue in current_queues.queues.values():
            assert queue.exists
Exemplo n.º 13
0
def test_event_queues_declare(app, event_entrypoints):
    """Test that event queues are declared properly."""
    try:
        for event in current_stats.events.values():
            assert not event.queue.exists
        current_queues.declare()
        for event in current_stats.events.values():
            assert event.queue.exists
    finally:
        current_queues.delete()
Exemplo n.º 14
0
def test_publish_and_consume_events(app, event_entrypoints):
    """Test that events are published and consumed properly."""
    try:
        event_type = 'file-download'
        events = [{"payload": "test {}".format(idx)} for idx in range(3)]
        current_queues.declare()
        current_stats.publish(event_type, events)
        assert list(current_stats.consume(event_type)) == events
    finally:
        current_queues.delete()
Exemplo n.º 15
0
def test_event_queues_declare(app, event_entrypoints):
    """Test that event queues are declared properly."""
    try:
        for event in current_stats.events.values():
            assert not event.queue.exists
        current_queues.declare()
        for event in current_stats.events.values():
            assert event.queue.exists
    finally:
        current_queues.delete()
Exemplo n.º 16
0
def test_publish_and_consume_events(app, event_entrypoints):
    """Test that events are published and consumed properly."""
    try:
        event_type = 'file-download'
        events = [{"payload": "test {}".format(idx)} for idx in range(3)]
        current_queues.declare()
        current_stats.publish(event_type, events)
        assert list(current_stats.consume(event_type)) == events
    finally:
        current_queues.delete()
Exemplo n.º 17
0
def test_double_clicks(app, mock_event_queue, es):
    """Test that events occurring within a time window are counted as 1."""
    event_type = 'file-download'
    events = [
        _create_file_download_event(date)
        for date in [(2000, 6, 1, 10, 0,
                      10), (2000, 6, 1, 10, 0,
                            11), (2000, 6, 1, 10, 0,
                                  19), (2000, 6, 1, 10, 0, 22)]
    ]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    es.indices.refresh(index='*')
    res = es.search(index='events-stats-file-download-2000-06-01', )
    assert res['hits']['total'] == 2
Exemplo n.º 18
0
def test_failing_processors(app, es, event_queues, caplog):
    """Test events that raise an exception when processed."""
    search = Search(using=es)

    current_queues.declare()
    current_stats.publish('file-download', [
        _create_file_download_event(date)
        for date in [(2018, 1, 1), (2018, 1, 2), (2018, 1, 3), (2018, 1, 4)]
    ])

    def _raises_on_second_call(doc):
        if _raises_on_second_call.calls == 1:
            _raises_on_second_call.calls += 1
            raise Exception('mocked-exception')
        _raises_on_second_call.calls += 1
        return doc

    _raises_on_second_call.calls = 0

    queue = current_queues.queues['stats-file-download']
    indexer = EventsIndexer(queue, preprocessors=[_raises_on_second_call])

    current_search.flush_and_refresh(index='*')
    assert get_queue_size('stats-file-download') == 4
    assert not es.indices.exists('events-stats-file-download-2018-01-01')
    assert not es.indices.exists('events-stats-file-download-2018-01-02')
    assert not es.indices.exists('events-stats-file-download-2018-01-03')
    assert not es.indices.exists('events-stats-file-download-2018-01-04')
    assert not es.indices.exists_alias(name='events-stats-file-download')

    with caplog.at_level(logging.ERROR):
        indexer.run()  # 2nd event raises exception and is dropped

    # Check that the error was logged
    error_logs = [r for r in caplog.records if r.levelno == logging.ERROR]
    assert len(error_logs) == 1
    assert error_logs[0].msg == 'Error while processing event'
    assert error_logs[0].exc_info[1].args[0] == 'mocked-exception'

    current_search.flush_and_refresh(index='*')
    assert get_queue_size('stats-file-download') == 0
    assert search.index('events-stats-file-download').count() == 3
    assert search.index('events-stats-file-download-2018-01-01').count() == 1
    assert not es.indices.exists('events-stats-file-download-2018-01-02')
    assert search.index('events-stats-file-download-2018-01-03').count() == 1
    assert search.index('events-stats-file-download-2018-01-04').count() == 1
Exemplo n.º 19
0
def generate_events(app, file_number=5, event_number=100, robot_event_number=0,
                    start_date=datetime.date(2017, 1, 1),
                    end_date=datetime.date(2017, 1, 7)):
    """Queued events for processing tests."""
    current_queues.declare()

    for t in current_search.put_templates(ignore=[400]):
        pass

    def generator_list():
        for file_idx in range(file_number):
            for entry_date in date_range(start_date, end_date):
                entry_date = datetime.datetime.combine(
                    entry_date, datetime.time())
                file_id = '{0}-{1}'.format(entry_date.strftime('%Y-%m-%d'),
                                           file_idx)

                def build_event(is_robot=False):
                    return dict(
                        timestamp=entry_date.isoformat(),
                        bucket_id=file_id,
                        file_id=file_id,
                        file_key='test.pdf',
                        visitor_id=100,
                        is_robot=is_robot
                    )

                for event_idx in range(event_number):
                    yield build_event()
                for event_idx in range(robot_event_number):
                    yield build_event(True)

    mock_queue = Mock()
    mock_queue.consume.return_value = generator_list()
    mock_queue.routing_key = 'stats-file-download'

    EventsIndexer(
        mock_queue,
        preprocessors=[
            build_file_unique_id
        ]
    ).run()
    current_search_client.indices.flush(index='*')
Exemplo n.º 20
0
def generate_events(app,
                    file_number=5,
                    event_number=100,
                    robot_event_number=0,
                    start_date=datetime.date(2017, 1, 1),
                    end_date=datetime.date(2017, 1, 7)):
    """Queued events for processing tests."""
    current_queues.declare()

    for t in current_search.put_templates(ignore=[400]):
        pass

    def generator_list():
        for file_idx in range(file_number):
            for entry_date in date_range(start_date, end_date):
                entry_date = datetime.datetime.combine(entry_date,
                                                       datetime.time())
                file_id = '{0}-{1}'.format(entry_date.strftime('%Y-%m-%d'),
                                           file_idx)

                def build_event(is_robot=False):
                    return dict(timestamp=entry_date.isoformat(),
                                bucket_id=file_id,
                                file_id=file_id,
                                file_key='test.pdf',
                                visitor_id=100,
                                is_robot=is_robot)

                for event_idx in range(event_number):
                    yield build_event()
                for event_idx in range(robot_event_number):
                    yield build_event(True)

    mock_queue = Mock()
    mock_queue.consume.return_value = generator_list()
    mock_queue.routing_key = 'stats-file-download'

    EventsIndexer(mock_queue, preprocessors=[build_file_unique_id]).run()
    current_search_client.indices.flush(index='*')
Exemplo n.º 21
0
def queues_declare(alembic, verbose):
    """Declare queues, except for the indexing queue."""
    current_queues.declare()
Exemplo n.º 22
0
def test_overwriting_aggregations(app, es, event_queues, sequential_ids):
    """Check that the StatAggregator correctly starts from bookmark.

    1. Create sample file download event and process it.
    2. Run aggregator and write count, in aggregation index.
    3. Create new events and repeat procedure to assert that the
        results within the interval of the previous events
        overwrite the aggregation,
        by checking that the document version has increased.
    """
    for t in current_search.put_templates(ignore=[400]):
        pass

    class NewDate(datetime.datetime):
        """datetime.datetime mock."""
        # Aggregate at 12:00, thus the day will be aggregated again later
        current_date = (2017, 6, 2, 12)

        @classmethod
        def utcnow(cls):
            return cls(*cls.current_date)

    # Send some events
    event_type = 'file-download'
    events = [
        _create_file_download_event(date)
        for date in [(2017, 6, 1), (2017, 6, 2, 10)]
    ]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])

    # Send new events, some on the last aggregated day and some far
    # in the future.
    res = current_search_client.search(index='stats-file-download',
                                       version=True)
    for hit in res['hits']['hits']:
        if 'file_id' in hit['_source'].keys():
            assert hit['_version'] == 1

    new_events = [
        _create_file_download_event(date) for date in [
            (2017, 6, 2, 15),  # second event on the same date
            (2017, 7, 1)
        ]
    ]
    current_stats.publish(event_type, new_events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')

    # Aggregate again. The aggregation should start from the last bookmark.
    NewDate.current_date = (2017, 7, 2)
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])
    current_search_client.indices.flush(index='*')

    res = current_search_client.search(
        index='stats-file-download',
        doc_type='file-download-day-aggregation',
        version=True)
    for hit in res['hits']['hits']:
        if hit['_source']['timestamp'] == '2017-06-02T00:00:00':
            assert hit['_version'] == 2
            assert hit['_source']['count'] == 2
        else:
            assert hit['_version'] == 1
Exemplo n.º 23
0
def test_overwriting_aggregations(app, es, event_queues, sequential_ids):
    """Check that the StatAggregator correctly starts from bookmark.

    1. Create sample file download event and process it.
    2. Run aggregator and write count, in aggregation index.
    3. Create new events and repeat procedure to assert that the
        results within the interval of the previous events
        overwrite the aggregation,
        by checking that the document version has increased.
    """
    for t in current_search.put_templates(ignore=[400]):
        pass

    class NewDate(datetime.datetime):
        """datetime.datetime mock."""
        # Aggregate at 12:00, thus the day will be aggregated again later
        current_date = (2017, 6, 2, 12)

        @classmethod
        def utcnow(cls):
            return cls(*cls.current_date)

    # Send some events
    event_type = 'file-download'
    events = [_create_file_download_event(date) for date in
              [(2017, 6, 1), (2017, 6, 2, 10)]]
    current_queues.declare()
    current_stats.publish(event_type, events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])

    # Send new events, some on the last aggregated day and some far
    # in the future.
    res = current_search_client.search(index='stats-file-download',
                                       version=True)
    for hit in res['hits']['hits']:
        if 'file_id' in hit['_source'].keys():
            assert hit['_version'] == 1

    new_events = [_create_file_download_event(date) for date in
                  [(2017, 6, 2, 15),  # second event on the same date
                   (2017, 7, 1)]]
    current_stats.publish(event_type, new_events)
    process_events(['file-download'])
    current_search_client.indices.flush(index='*')

    # Aggregate again. The aggregation should start from the last bookmark.
    NewDate.current_date = (2017, 7, 2)
    with patch('datetime.datetime', NewDate):
        aggregate_events(['file-download-agg'])
    current_search_client.indices.flush(index='*')

    res = current_search_client.search(
        index='stats-file-download',
        doc_type='file-download-day-aggregation',
        version=True
    )
    for hit in res['hits']['hits']:
        if hit['_source']['timestamp'] == '2017-06-02T00:00:00':
            assert hit['_version'] == 2
            assert hit['_source']['count'] == 2
        else:
            assert hit['_version'] == 1
Exemplo n.º 24
0
def queues_declare(alembic, verbose):
    """Declare queues, except for the indexing queue."""
    current_queues.declare()