示例#1
0
    def handle(self, *args, **options):
        elasticsearch_client = instantiate_elasticsearch_client()
        config = process_cli_parameters(options, elasticsearch_client)

        start = perf_counter()
        printf({"msg": f"Starting script\n{'=' * 56}"})
        start_msg = "target index: {index_name} | FY(s): {fiscal_years} | Starting from: {starting_date}"
        printf({"msg": start_msg.format(**config)})

        if config["load_type"] == "transactions":
            ensure_view_exists(settings.ES_TRANSACTIONS_ETL_VIEW_NAME)
        elif config["load_type"] == "awards":
            ensure_view_exists(settings.ES_AWARDS_ETL_VIEW_NAME)

        loader = Rapidloader(config, elasticsearch_client)
        loader.run_load_steps()
        loader.complete_process()

        printf({
            "msg":
            "---------------------------------------------------------------"
        })
        printf({"msg": f"Script completed in {perf_counter() - start:.2f}s"})
        printf({
            "msg":
            "---------------------------------------------------------------"
        })
def test_incremental_load_into_award_index(award_data_fixture,
                                           elasticsearch_award_index,
                                           monkeypatch):
    """Test the ``elasticsearch_loader`` django management command to incrementally load updated data into the awards ES
    index from the DB, overwriting the doc that was already there
    """
    original_db_awards_count = Award.objects.count()
    elasticsearch_award_index.update_index()
    client = elasticsearch_award_index.client  # type: Elasticsearch
    assert client.indices.exists(elasticsearch_award_index.index_name)
    es_award_docs = client.count(
        index=elasticsearch_award_index.index_name)["count"]
    assert es_award_docs == original_db_awards_count

    # Inject ETL arg into config for this run, to suppress processing deletes. Test incremental load only
    elasticsearch_award_index.etl_config["process_deletes"] = False
    elasticsearch_award_index.etl_config["start_datetime"] = datetime.now(
        timezone.utc)
    es_etl_config = _process_es_etl_test_config(client,
                                                elasticsearch_award_index)

    # Now modify one of the DB objects
    awd = Award.objects.first()  # type: Award
    awd.total_obligation = 9999
    awd.save()

    # Must use mock sql function to share test DB conn+transaction in ETL code
    # Patching on the module into which it is imported, not the module where it is defined
    monkeypatch.setattr(
        "usaspending_api.etl.elasticsearch_loader_helpers.extract_data.execute_sql_statement",
        mock_execute_sql)
    # Also override SQL function listed in config object with the mock one
    es_etl_config["execute_sql_func"] = mock_execute_sql
    ensure_view_exists(es_etl_config["sql_view"], force=True)
    loader = Controller(es_etl_config)
    assert loader.__class__.__name__ == "Controller"
    loader.prepare_for_etl()
    loader.dispatch_tasks()
    client.indices.refresh(elasticsearch_award_index.index_name)

    assert client.indices.exists(elasticsearch_award_index.index_name)
    es_award_docs = client.count(
        index=elasticsearch_award_index.index_name)["count"]
    assert es_award_docs == original_db_awards_count
    es_awards = client.search(index=elasticsearch_award_index.index_name)
    updated_award = [
        a for a in es_awards["hits"]["hits"]
        if a["_source"]["award_id"] == awd.id
    ][0]
    assert int(updated_award["_source"]["total_obligation"]) == 9999
示例#3
0
    def handle(self, *args, **options):
        elasticsearch_client = instantiate_elasticsearch_client()
        config = parse_cli_args(options, elasticsearch_client)

        start = perf_counter()
        logger.info(format_log(f"Starting script\n{'=' * 56}"))
        start_msg = "target index: {index_name} | Starting from: {starting_date}"
        logger.info(format_log(start_msg.format(**config)))

        ensure_view_exists(config["sql_view"], force=True)
        error_addition = ""
        loader = Controller(config)

        if config["is_incremental_load"]:
            toggle_refresh_off(elasticsearch_client,
                               config["index_name"])  # Turned back on at end.

        try:
            if config["process_deletes"]:
                loader.run_deletes()

            if not config["deletes_only"]:
                loader.prepare_for_etl()
                loader.dispatch_tasks()
        except Exception as e:
            logger.error(f"{str(e)}")
            error_addition = "before encountering a problem during execution.... "
            raise SystemExit(1)
        else:
            loader.complete_process()
            if config["drop_db_view"]:
                logger.info(
                    format_log(f"Dropping SQL view '{config['sql_view']}'"))
                drop_etl_view(config["sql_view"], True)
        finally:
            msg = f"Script duration was {perf_counter() - start:.2f}s {error_addition}|"
            headers = f"{'-' * (len(msg) - 2)} |"
            logger.info(format_log(headers))
            logger.info(format_log(msg))
            logger.info(format_log(headers))

        # Used to help pipeline determine when job passed but needs attention
        if config["raise_status_code_3"]:
            raise SystemExit(3)
示例#4
0
def django_db_setup(
    request,
    django_test_environment,
    django_db_blocker,
    django_db_use_migrations,
    django_db_keepdb,
    django_db_createdb,
    django_db_modify_db_settings,
):
    """This is an override of the original implementation in the https://github.com/pytest-dev/pytest-django plugin
    from file /pytest-django/fixtures.py.

    Because this "hides" the original implementation, it may get out-of-date as that plugin is upgraded. This override
    takes the implementation from pytest-django Release 3.5.1, and extends it in order to execute materialized views
    as part of database setup.

    If requirements.txt shows a different version than the one this is based on: compare, update, and test.
    More work could be put into trying to patch, replace, or wrap implementation of
    ``django.test.utils.setup_databases``, which is the actual method that needs to be wrapped and extended.
    """
    from pytest_django.compat import setup_databases, teardown_databases
    from pytest_django.fixtures import _disable_native_migrations

    setup_databases_args = {}

    if not django_db_use_migrations:
        _disable_native_migrations()

    if django_db_keepdb and not django_db_createdb:
        setup_databases_args["keepdb"] = True

    with django_db_blocker.unblock():
        db_cfg = setup_databases(verbosity=request.config.option.verbose,
                                 interactive=False,
                                 **setup_databases_args)
        # If migrations are skipped, assume matviews and views are not to be (re)created either
        # Other scenarios (such as reuse or keep DB) may still lead to creation of a non-existent DB, so they must be
        # (re)created under those conditions
        if not django_db_use_migrations:
            logger.warning(
                "Skipping generation of materialized views or other views in this test run because migrations are also "
                "being skipped. ")
        else:
            generate_matviews(materialized_views_as_traditional_views=True)
            ensure_view_exists(settings.ES_TRANSACTIONS_ETL_VIEW_NAME)
            ensure_view_exists(settings.ES_AWARDS_ETL_VIEW_NAME)
            ensure_business_categories_functions_exist()
            call_command("load_broker_static_data")

    def teardown_database():
        with django_db_blocker.unblock():
            try:
                teardown_databases(db_cfg,
                                   verbosity=request.config.option.verbose)
            except Exception as exc:
                request.node.warn(
                    pytest.PytestWarning(
                        "Error when trying to teardown test databases: %r" %
                        exc))

    if not django_db_keepdb:
        request.addfinalizer(teardown_database)