Пример #1
0
    def _add_contents(self):
        """
        Get all of the transactions presented in the view and stuff them into the Elasticsearch index.
        The view is only needed to load the transactions into Elasticsearch so it is dropped after each use.
        """
        view_sql_file = "award_delta_view.sql" if self.index_type == "awards" else "transaction_delta_view.sql"
        view_sql = open(str(settings.APP_DIR / "database_scripts" / "etl" / view_sql_file), "r").read()
        with connection.cursor() as cursor:
            cursor.execute(view_sql)
            if self.index_type == "transactions":
                view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME
            else:
                view_name = settings.ES_AWARDS_ETL_VIEW_NAME
            cursor.execute(f"SELECT * FROM {view_name};")
            transactions = ordered_dictionary_fetcher(cursor)
            cursor.execute(f"DROP VIEW {view_name};")

        for transaction in transactions:
            self.client.index(
                self.index_name,
                self.doc_type,
                json.dumps(transaction, cls=DjangoJSONEncoder),
                transaction["{}_id".format(self.index_type[:-1])],
            )
        # Force newly added documents to become searchable.
        self.client.indices.refresh(self.index_name)
Пример #2
0
    def _add_contents(self, **options):
        """
        Get all of the transactions presented in the view and stuff them into the Elasticsearch index.
        The view is only needed to load the transactions into Elasticsearch so it is dropped after each use.
        """
        view_sql_file = "award_delta_view.sql" if self.index_type == "awards" else "transaction_delta_view.sql"
        view_sql = open(str(settings.APP_DIR / "database_scripts" / "etl" / view_sql_file), "r").read()
        with connection.cursor() as cursor:
            cursor.execute(view_sql)
            if self.index_type == "transactions":
                view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME
            else:
                view_name = settings.ES_AWARDS_ETL_VIEW_NAME
            cursor.execute(f"SELECT * FROM {view_name};")
            transactions = ordered_dictionary_fetcher(cursor)
            cursor.execute(f"DROP VIEW {view_name};")

        for transaction in transactions:
            # Special cases where we convert array of JSON to an array of strings to avoid nested types
            routing_key = options.get("routing", settings.ES_ROUTING_FIELD)
            routing_value = transaction.get(routing_key)
            if self.index_type == "transactions":
                transaction["federal_accounts"] = self.convert_json_arrays_to_list(transaction["federal_accounts"])
            self.client.index(
                index=self.index_name,
                body=json.dumps(transaction, cls=DjangoJSONEncoder),
                id=transaction["{}_id".format(self.index_type[:-1])],
                routing=routing_value,
            )
        # Force newly added documents to become searchable.
        self.client.indices.refresh(self.index_name)
Пример #3
0
    def _add_contents(self, **options):
        """
        Get all of the transactions presented in the view and stuff them into the Elasticsearch index.
        The view is only needed to load the transactions into Elasticsearch so it is dropped after each use.
        """
        if self.index_type == "award":
            view_sql_file = f"{settings.ES_AWARDS_ETL_VIEW_NAME}.sql"
            view_name = settings.ES_AWARDS_ETL_VIEW_NAME
            es_id = f"{self.index_type}_id"
        elif self.index_type == "covid19_faba":
            view_sql_file = f"{settings.ES_COVID19_FABA_ETL_VIEW_NAME}.sql"
            view_name = settings.ES_COVID19_FABA_ETL_VIEW_NAME
            es_id = "financial_account_distinct_award_key"
        elif self.index_type == "transaction":
            view_sql_file = f"{settings.ES_TRANSACTIONS_ETL_VIEW_NAME}.sql"
            view_name = settings.ES_TRANSACTIONS_ETL_VIEW_NAME
            es_id = f"{self.index_type}_id"
        else:
            raise Exception("Invalid index type")

        view_sql = open(str(settings.APP_DIR / "database_scripts" / "etl" / view_sql_file), "r").read()
        with connection.cursor() as cursor:
            cursor.execute(view_sql)
            cursor.execute(f"SELECT * FROM {view_name};")
            records = ordered_dictionary_fetcher(cursor)
            cursor.execute(f"DROP VIEW {view_name};")
            if self.index_type == "covid19_faba":
                records = transform_covid19_faba_data(
                    TaskSpec(
                        name="worker",
                        index=self.index_name,
                        sql=view_sql_file,
                        view=view_name,
                        base_table="financial_accounts_by_awards",
                        base_table_id="financial_accounts_by_awards_id",
                        field_for_es_id="financial_account_distinct_award_key",
                        primary_key="award_id",
                        partition_number=1,
                        is_incremental=False,
                    ),
                    records,
                )
        for record in records:
            # Special cases where we convert array of JSON to an array of strings to avoid nested types
            routing_key = options.get("routing", settings.ES_ROUTING_FIELD)
            routing_value = record.get(routing_key)
            es_id_value = record.get(es_id)
            if self.index_type == "transaction":
                record["federal_accounts"] = self.convert_json_arrays_to_list(record["federal_accounts"])
            if self.index_type == "covid19_faba":
                es_id_value = record.pop("_id")
            self.client.index(
                index=self.index_name,
                body=json.dumps(record, cls=DjangoJSONEncoder),
                id=es_id_value,
                routing=routing_value,
            )
        # Force newly added documents to become searchable.
        self.client.indices.refresh(self.index_name)
    def test_all_the_things(self):
        """
        Because we are using TransactionTestCase we're going to run all of our tests in one method to
        prevent repeated set ups and tear downs which are expensive.  This is less than ideal, but we'll
        probably be fine.
        """

        # Cue firey explosions.
        with self.assertRaises(CommandError):
            call_command("load_multiple_submissions")
        with self.assertRaises(CommandError):
            call_command("load_multiple_submissions", "--list-ids-only")

        # Load nothing.
        call_command("load_multiple_submissions", "--submission-ids", 1, 2, 3, "--list-ids-only")
        assert SubmissionAttributes.objects.count() == 0
        assert AppropriationAccountBalances.objects.count() == 0
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 0
        assert FinancialAccountsByAwards.objects.count() == 0

        # Also load nothing.
        call_command("load_multiple_submissions", "--incremental", "--list-ids-only")
        assert SubmissionAttributes.objects.count() == 0
        assert AppropriationAccountBalances.objects.count() == 0
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 0
        assert FinancialAccountsByAwards.objects.count() == 0

        # Load specific submissions.
        call_command("load_multiple_submissions", "--submission-ids", 1, 2, 3)
        assert SubmissionAttributes.objects.count() == 3
        assert AppropriationAccountBalances.objects.count() == 3
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 5
        assert FinancialAccountsByAwards.objects.count() == 9

        # We'll need these later.
        update_date_sub_2 = SubmissionAttributes.objects.get(submission_id=2).update_date
        create_date_sub_3 = SubmissionAttributes.objects.get(submission_id=3).create_date

        # Load remaining submissions.
        call_command("load_multiple_submissions", "--incremental")
        assert SubmissionAttributes.objects.count() == 5
        assert AppropriationAccountBalances.objects.count() == 5
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 7
        assert FinancialAccountsByAwards.objects.count() == 11

        # Now that we have everything loaded, let's make sure our data make sense.
        with connections[DEFAULT_DB_ALIAS].cursor() as cursor:
            cursor.execute("select * from submission_attributes where submission_id = 1")
            d = dict(ordered_dictionary_fetcher(cursor)[0])
            del d["create_date"]
            del d["update_date"]
            assert d == {
                "submission_id": 1,
                "certified_date": datetime(2000, 2, 1, 0, 0, tzinfo=timezone.utc),
                "toptier_code": "001",
                "reporting_period_start": date(2000, 1, 1),
                "reporting_period_end": date(2000, 3, 31),
                "reporting_fiscal_year": 2000,
                "reporting_fiscal_quarter": 2,
                "reporting_fiscal_period": 4,
                "quarter_format_flag": True,
                "reporting_agency_name": None,
                "published_date": datetime(2000, 1, 1, 0, 0, tzinfo=timezone.utc),
            }

            cursor.execute(
                """
                    select  sum(total_budgetary_resources_amount_cpe)
                    from    appropriation_account_balances
                """
            )
            assert cursor.fetchone()[0] == Decimal("165.00")

            cursor.execute(
                """
                    select  sum(gross_outlay_amount_by_program_object_class_cpe),
                            string_agg(disaster_emergency_fund_code, ',' order by disaster_emergency_fund_code)
                    from    financial_accounts_by_program_activity_object_class
                """
            )
            assert cursor.fetchone() == (Decimal("-52116.00"), "B,B,L,L")

            cursor.execute(
                """
                    select  sum(gross_outlay_amount_by_award_cpe),
                            sum(transaction_obligated_amount),
                            string_agg(disaster_emergency_fund_code, ',' order by disaster_emergency_fund_code)
                    from    financial_accounts_by_awards
                """
            )
            assert cursor.fetchone() == (Decimal("-521207.00"), Decimal("-5212070.00"), "B,B,L,L,L,L")

        # Nuke a submission.
        SubmissionAttributes.objects.filter(submission_id=1).delete()
        assert SubmissionAttributes.objects.count() == 4
        assert AppropriationAccountBalances.objects.count() == 4
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 4
        assert FinancialAccountsByAwards.objects.count() == 8

        # Make sure it reloads.
        call_command("load_multiple_submissions", "--incremental")
        assert SubmissionAttributes.objects.count() == 5
        assert AppropriationAccountBalances.objects.count() == 5
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 7
        assert FinancialAccountsByAwards.objects.count() == 11

        # Make a change to a submission.
        SubmissionAttributes.objects.filter(submission_id=1).update(reporting_fiscal_year=1999)
        assert SubmissionAttributes.objects.get(submission_id=1).reporting_fiscal_year == 1999

        # Make sure it reloads.
        call_command("load_multiple_submissions", "--incremental")
        assert SubmissionAttributes.objects.get(submission_id=1).reporting_fiscal_year == 2000

        # Nuke a submission.
        SubmissionAttributes.objects.filter(submission_id=1).delete()

        # Make it really old.
        with connections["data_broker"].cursor() as cursor:
            cursor.execute("update submission set updated_at = '1999-01-01' where submission_id = 1")

        # Make sure it DOESN'T reload.
        call_command("load_multiple_submissions", "--incremental")
        assert SubmissionAttributes.objects.count() == 4
        assert AppropriationAccountBalances.objects.count() == 4
        assert FinancialAccountsByProgramActivityObjectClass.objects.count() == 4
        assert FinancialAccountsByAwards.objects.count() == 8

        # Ok, after all the stuff we just did, let's make sure submissions 2 and 3 never got touched.
        assert SubmissionAttributes.objects.get(submission_id=2).update_date == update_date_sub_2
        assert SubmissionAttributes.objects.get(submission_id=3).create_date == create_date_sub_3

        # Now let's make sure submission 2 gets touched.
        call_command("load_multiple_submissions", "--submission-ids", 2)
        assert SubmissionAttributes.objects.get(submission_id=2).update_date > update_date_sub_2

        # Let's test the new certified_date change detection code.  But first, bring submission 1 back to the present.
        with connections["data_broker"].cursor() as cursor:
            cursor.execute("update submission set updated_at = now() where submission_id = 1")
        call_command("load_multiple_submissions", "--submission-ids", 1)

        # Everything is in sync.  Both lists should be empty.
        certified_only_submission_ids, load_submission_ids = LoadMultipleCommand.get_incremental_submission_ids()
        assert certified_only_submission_ids == []
        assert load_submission_ids == []

        # There should be only one certified_only difference.
        SubmissionAttributes.objects.filter(submission_id=3).update(certified_date="1999-01-01")
        certified_only_submission_ids, load_submission_ids = LoadMultipleCommand.get_incremental_submission_ids()
        assert [tuple(c) for c in certified_only_submission_ids] == [(3, datetime(2000, 2, 3))]
        assert load_submission_ids == []

        # There should be one certified_only difference and one reload.
        SubmissionAttributes.objects.filter(submission_id=1).update(published_date="1999-01-01")
        certified_only_submission_ids, load_submission_ids = LoadMultipleCommand.get_incremental_submission_ids()
        assert [tuple(c) for c in certified_only_submission_ids] == [(3, datetime(2000, 2, 3))]
        assert load_submission_ids == [1]

        # Fix everything.
        call_command("load_multiple_submissions", "--incremental")
        certified_only_submission_ids, load_submission_ids = LoadMultipleCommand.get_incremental_submission_ids()
        assert certified_only_submission_ids == []
        assert load_submission_ids == []

        # Confirm that submission 3 only received a certified_date change, not a reload.
        assert SubmissionAttributes.objects.get(submission_id=3).create_date == create_date_sub_3
Пример #5
0
def dictfetchall(cursor):
    if isinstance(cursor, PhonyCursor):
        if not cursor.results:
            return []
        return cursor.results
    return ordered_dictionary_fetcher(cursor)