示例#1
0
def test_delete_keys(redis_db, redis_driver, config_helper, logger,
                     ingestion_manager):
    db: RedisDB = redis_db
    r: Redis = redis_driver
    commons.purge_redis_database(redis_db=redis_db, log=logger)
    commons.init_redis_test_data(im=ingestion_manager)
    keys_to_delete = [
        key for key in r.keys(pattern=f'{config_helper.test_job_name}*')
    ]
    db.delete_keys(keys=keys_to_delete)
    after_deletion = (key for key in r.keys(pattern='test_key*'))
    assert any(after_deletion) is False
def test_process_job(config_helper, ingestion_manager, redis_db, logger, neo):
    commons.purge_redis_database(redis_db=redis_db, log=logger)
    commons.purge_neo4j_database(log=logger, neo=neo)
    commons.init_redis_test_data(im=ingestion_manager)
    im = ingestion_manager
    im.process_redis_content(translation_id=config_helper.test_job_name,
                             request_id='unit-testing')
    query = f'MATCH (:{config_helper.test_labels[0]}) RETURN COUNT(*) AS count'
    count = neo.pull_query(query=query).value()[0]
    assert count == config_helper.number_of_test_nodes
    query = f'MATCH ()-[:{config_helper.test_edge_type}]->() RETURN COUNT(*) AS count'
    count = neo.pull_query(query=query).value()[0]
    assert count == config_helper.number_of_test_edges
def test_publish_job(config_helper, redis_driver, ingestion_manager, nodes,
                     edges, logger, redis_db):
    r: Redis = redis_driver
    im: IngestionManager = ingestion_manager

    commons.purge_redis_database(redis_db=redis_db, log=logger)

    # Populate nodes
    im.publish_job(job_name=config_helper.test_job_name,
                   operation=config_helper.nodes_ingestion_operation,
                   operation_arguments=','.join(config_helper.test_labels),
                   items=[str(value) for value in nodes])

    # Populate edges
    im.publish_job(
        job_name=config_helper.test_job_name,
        operation=config_helper.edges_ingestion_operation,
        operation_arguments=
        f'{config_helper.test_edge_type},{config_helper.test_labels[0]}',
        items=[str(value) for value in edges])

    keys = r.keys(pattern=f'{config_helper.test_job_name}*')
    assert len(keys) == 2
    node_keys = r.keys(
        pattern=
        f'{config_helper.test_job_name}{config_helper.key_separator}{config_helper.nodes_ingestion_operation}{config_helper.key_separator}*'
    )
    assert len(node_keys) == 1
    edges_keys = r.keys(
        pattern=
        f'{config_helper.test_job_name}{config_helper.key_separator}{config_helper.edges_ingestion_operation}{config_helper.key_separator}*'
    )
    assert len(edges_keys) == 1

    nodes_key = node_keys[0]
    edges_key = edges_keys[0]

    num_stored_nodes = r.scard(name=nodes_key)
    assert num_stored_nodes == len(nodes)
    num_stored_edges = r.scard(name=edges_key)
    assert num_stored_edges == len(edges)
示例#4
0
def run_around_tests(redis_db, logger, ingestion_manager):
    commons.purge_redis_database(redis_db=redis_db, log=logger)
    commons.init_redis_test_data(im=ingestion_manager)
    yield
示例#5
0
def test_integration(ingestion_endpoint, config_helper, logger,
                     white_list_file_path, redis_db, neo):
    monitor = Communicator(mode=CommunicatorMode.CLIENT)
    commons.purge_redis_database(redis_db=redis_db, log=logger)
    commons.purge_neo4j_database(log=logger, neo=neo)

    request_id = 'my_request_id'

    request_body = {
        'request_id': request_id,
        'request_type': 'white_list',
        'file_path': white_list_file_path
    }
    logger.info(
        f'Requesting the ingestion to begin with a path to a white-list at : {white_list_file_path}.'
    )
    reply = requests.post(ingestion_endpoint, json=request_body)
    logger.info(
        f'Ingestion request sent and replied with code of {reply.status_code}, text: {reply.text}'
    )
    assert reply.status_code == 200

    events = monitor.events_iterator(request_id=request_id, timeout_seconds=90)

    events_count = 0
    for index, event in enumerate(events):
        events_count += 1
        logger.info(f'Received event: {event.event_type}')
        if index == 0:
            assert request_id in event.request_id
            assert event.event_type == GiraffeEventType.RECEIVED_REQUEST
            assert 'client_ip' in event.arguments.keys()
            assert 'request_content' in event.arguments.keys()
        elif index == 1:
            assert event.event_type == GiraffeEventType.STARTED_PROCESSING_REQUEST
            assert request_id in event.request_id
            assert 'request_type' in event.arguments.keys()
            assert 'request_content' in event.arguments.keys()
            assert request_id in event.message
        elif index == 2:
            assert event.event_type == GiraffeEventType.FETCHING_DATA_AND_MODELS
            assert request_id in event.request_id
            assert 'request_id' in event.arguments.keys()
            assert 'source_description' in event.arguments.keys()
        elif index == 3:
            assert event.event_type == GiraffeEventType.FINISHED_FETCHING_DATA_AND_MODELS
            assert 'request_id' in event.arguments.keys()
        elif index == 4 or index == 5:
            assert event.event_type == GiraffeEventType.WRITING_GRAPH_ELEMENTS_INTO_REDIS
            assert request_id in event.request_id
            assert 'request_id' in event.arguments.keys()
            assert 'source_name' in event.arguments.keys()
        elif index == 6:
            assert event.event_type == GiraffeEventType.REDIS_IS_READY_FOR_CONSUMPTION
            assert request_id in event.request_id
            assert 'request_id' in event.arguments.keys()
            assert 'parallel_results' in event.arguments.keys()
        elif index == 7 or index == 10:
            assert event.event_type == GiraffeEventType.WRITING_FROM_REDIS_TO_NEO
        elif index == 8 or index == 11:
            assert event.event_type == GiraffeEventType.PUSHED_GRAPH_ELEMENTS_INTO_NEO
            assert request_id in event.request_id
        elif index == 9 or index == 12:
            assert event.event_type == GiraffeEventType.DELETING_REDIS_KEYS
            assert request_id in event.request_id
            assert 'request_id' in event.arguments.keys()
            assert 'keys' in event.arguments.keys()
            monitor.set_client_timeout_seconds(30)
        elif index == 13:
            assert event.event_type == GiraffeEventType.DONE_PROCESSING_REQUEST
            assert request_id in event.request_id
            assert 'request_id' in event.arguments.keys()
    assert events_count == 14

    secs = 1
    waiting_iterations = 90

    def nodes_found_in_neo4j() -> bool:
        query = 'MATCH (n:MockPerson) RETURN COUNT(*) AS count'
        count = neo.pull_query(query=query).value()[0]
        return count == 20  # 10 for each of (source-1, source-2)

    expected_nodes_found_in_neo = timing_utils.wait_for(
        condition=nodes_found_in_neo4j,
        condition_name='Nodes-In-Neo4j',
        sec_sleep=secs,
        retries=waiting_iterations,
        logger=logger)
    if not expected_nodes_found_in_neo:
        pytest.fail(
            f'Neo4j does not seem to contain the expected nodes after {secs * waiting_iterations} seconds.'
        )

    logger.info('Cool — values are finally in Neo4j.')
示例#6
0
def test_integration(ingestion_endpoint, config_helper, logger,
                     white_list_file_path, redis_db, neo):
    monitor = Communicator()
    monitor.start_client()
    commons.purge_redis_database(redis_db=redis_db, log=logger)
    commons.purge_neo4j_database(log=logger, neo=neo)

    request_id = 'my_request_id'

    request_body = {
        'request_id': request_id,
        'request_type': 'white_list',
        'file_path': white_list_file_path
    }
    logger.info(
        f'Requesting the ingestion to begin with a path to a white-list at : {white_list_file_path}.'
    )
    reply = requests.post(ingestion_endpoint, json=request_body)
    logger.info(
        f'Ingestion request sent and replied with code of {reply.status_code}, text: {reply.text}'
    )
    assert reply.status_code == 200

    event: GiraffeEvent = monitor.fetch_event()
    assert event is not None  # Means we did not time-out on fetching.
    assert event.request_id is None  # Still unparsed.
    assert event.event_type == GiraffeEventType.GENERAL_EVENT
    assert 'client_ip' in event.arguments.keys()
    assert 'request_content' in event.arguments.keys()

    event = monitor.fetch_event()
    assert event is not None
    assert event.event_type == GiraffeEventType.STARTED
    assert request_id in event.request_id
    assert 'request_type' in event.arguments.keys()
    assert 'request_content' in event.arguments.keys()
    assert request_id in event.message

    event = monitor.fetch_event()
    assert event is not None
    assert event.event_type == GiraffeEventType.FETCHING_DATA_AND_MODELS
    assert request_id in event.request_id
    assert 'request_id' in event.arguments.keys()
    assert 'source_description' in event.arguments.keys()

    for _ in range(0, 2):  # Source-1, Source-2
        event = monitor.fetch_event()
        assert event is not None
        assert event.event_type == GiraffeEventType.WRITING_GRAPH_ELEMENTS_INTO_REDIS
        assert request_id in event.request_id
        assert 'request_id' in event.arguments.keys()
        assert 'source_name' in event.arguments.keys()

    monitor.set_client_timeout_seconds(60)
    event = monitor.fetch_event()
    assert event is not None
    assert event.event_type == GiraffeEventType.REDIS_IS_READY_FOR_CONSUMPTION
    assert request_id in event.request_id
    assert 'request_id' in event.arguments.keys()
    assert 'parallel_results' in event.arguments.keys()

    monitor.set_client_timeout_seconds(30)

    for _ in range(0, 2):  # Source-1, Source-2
        event = monitor.fetch_event()
        assert event is not None
        assert event.event_type == GiraffeEventType.PUSHED_GRAPH_ELEMENTS_INTO_NEO
        assert request_id in event.request_id

        event = monitor.fetch_event()
        assert event is not None
        assert event.event_type == GiraffeEventType.DELETING_REDIS_KEYS
        assert request_id in event.request_id
        assert 'request_id' in event.arguments.keys()
        assert 'keys' in event.arguments.keys()

    event = monitor.fetch_event()
    assert event is not None
    assert event.event_type == GiraffeEventType.DONE_PROCESSING_REQUEST
    assert request_id in event.request_id
    assert 'request_id' in event.arguments.keys()

    secs = 1
    waiting_iterations = 90

    def nodes_found_in_neo4j() -> bool:
        query = 'MATCH (n:MockPerson) RETURN COUNT(*) AS count'
        count = neo.pull_query(query=query).value()[0]
        return count == 20  # 10 for each of (source-1, source-2)

    expected_nodes_found_in_neo = timing_utils.wait_for(
        condition=nodes_found_in_neo4j,
        condition_name='Nodes-In-Neo4j',
        sec_sleep=secs,
        retries=waiting_iterations,
        logger=logger)
    if not expected_nodes_found_in_neo:
        pytest.fail(
            f'Neo4j does not seem to contain the expected nodes after {secs * waiting_iterations} seconds.'
        )

    logger.info('Cool — values are finally in Neo4j.')