def _perform_load(self):
        self._create_staging_table()
        self._import_input_file()

        natural_key_columns = ["feature_id", "state_alpha", "county_sequence", "county_numeric"]
        destination_table = ETLTable(self.destination_table_name, key_overrides=natural_key_columns)
        source_table = ETLTemporaryTable(self.staging_table_name)

        with Timer("Delete obsolete rows"):
            deleted_count = operations.delete_obsolete_rows(source_table, destination_table)
            logger.info(f"{deleted_count:,} rows deleted")

        with Timer("Update changed rows"):
            updated_count = operations.update_changed_rows(source_table, destination_table)
            logger.info(f"{updated_count:,} rows updated")

        with Timer("Insert missing rows"):
            inserted_count = operations.insert_missing_rows(source_table, destination_table)
            logger.info(f"{inserted_count:,} rows inserted")

        with Timer("Remove staging table"):
            execute_dml_sql(f'drop table if exists "{self.staging_table_name}"')

        change_count = deleted_count + updated_count + inserted_count
        current_record_count = CityCountyStateCode.objects.count()
        max_change_threshold = int(current_record_count * MAX_CHANGE_PERCENT / 100.0)
        if current_record_count > 0 and change_count > max_change_threshold and not self.force:
            raise RuntimeError(
                f"Exceeded maximum number of allowed changes {max_change_threshold:,} ({MAX_CHANGE_PERCENT}%).  "
                f"Use --force switch if this was intentional."
            )

        return change_count
def vacuum_tables():
    table_names = [
        "submission_attributes",
        "appropriation_account_balances",
        "financial_accounts_by_program_activity_object_class",
        "financial_accounts_by_awards",
    ]
    for table_name in table_names:
        with OneLineTimer(f"Vacuum {table_name}") as t:
            execute_dml_sql(f'vacuum (full, analyze) "{table_name}"')
        logger.info(t.success_message)
def identify_new_or_updated(source: ETLObjectBase,
                            destination: ETLWritableObjectBase,
                            staging: ETLTemporaryTable) -> int:
    """
    Create a temporary staging table containing keys of rows in source that are new or
    updated from destination and return the number of rows affected.
    """

    # Destination columns that are in source that are not overridden and are not keys.
    changeable_columns = _get_changeable_columns(source, destination)

    sql = """
        create temporary table {staging_object_representation} as
        select {select_columns}
        from   {source_object_representation} as s
               left outer join {destination_object_representation} as d on {join}
        where  ({excluder}) or ({detect_changes})
    """

    sql = SQL(sql).format(
        staging_object_representation=staging.object_representation,
        select_columns=primatives.make_column_list(
            [c.name for c in destination.key_columns], "s"),
        source_object_representation=source.object_representation,
        destination_object_representation=destination.object_representation,
        join=primatives.make_join_conditional(destination.key_columns, "d",
                                              "s"),
        excluder=primatives.make_join_excluder_conditional(
            destination.key_columns, "d"),
        detect_changes=primatives.make_change_detector_conditional(
            changeable_columns, "s", "d"),
    )

    return sql_helpers.execute_dml_sql(sql)
def update_changed_rows(source: ETLObjectBase,
                        destination: ETLWritableObjectBase) -> int:
    """ Update rows in destination that have changed in source and return the number of rows updated. """

    # Destination columns that are in source or are overridden but are not keys.
    settable_columns = _get_settable_columns(source, destination)

    # Destination columns that are in source that are not overridden and are not keys.
    changeable_columns = _get_changeable_columns(source, destination)

    sql = """
        update      {destination_object_representation} as d
        set         {set}
        from        {source_object_representation} as s
        where       {where} and ({detect_changes})
    """

    sql = SQL(sql).format(
        destination_object_representation=destination.object_representation,
        set=primatives.make_column_setter_list(settable_columns, "s",
                                               destination.update_overrides),
        source_object_representation=source.object_representation,
        where=primatives.make_join_conditional(destination.key_columns, "s",
                                               "d"),
        detect_changes=primatives.make_change_detector_conditional(
            changeable_columns, "s", "d"),
    )

    return sql_helpers.execute_dml_sql(sql)
def insert_missing_rows(source: ETLObjectBase,
                        destination: ETLWritableObjectBase) -> int:
    """ Insert rows from source that do not exist in destination and return the number of rows inserted. """

    # Destination columns that are in source or are overridden.
    insertable_columns = _get_shared_columns(
        source.columns + list(destination.insert_overrides),
        destination.columns)

    sql = """
        insert into {destination_object_representation} ({insert_columns})
        select      {select_columns}
        from        {source_object_representation} as s
                    left outer join {destination_object_representation} as d on {join}
        where       {excluder}
    """

    sql = SQL(sql).format(
        destination_object_representation=destination.object_representation,
        insert_columns=primatives.make_column_list(insertable_columns),
        select_columns=primatives.make_column_list(
            insertable_columns, "s", destination.insert_overrides),
        source_object_representation=source.object_representation,
        join=primatives.make_join_conditional(destination.key_columns, "d",
                                              "s"),
        excluder=primatives.make_join_excluder_conditional(
            destination.key_columns, "d"),
    )

    return sql_helpers.execute_dml_sql(sql)
 def _create_staging_table(self):
     with Timer("Create temporary staging table"):
         execute_dml_sql(f'drop table if exists "{self.staging_table_name}"')
         execute_dml_sql(
             f'create temporary table "{self.staging_table_name}" (like "{self.destination_table_name}" including all)'
         )
         execute_dml_sql(f'alter table "{self.staging_table_name}" drop column "id"')
示例#7
0
def update_treasury_appropriation_account_agencies():
    """
    Updates the awarding and funding toptier agencies in the TreasuryAppropriationAccount table.

    For ATA:
        - If awarding_toptier_agency_id is for a DOD subsumed agency (Army, Navy, Air Force), remap it to 097 (DOD).
        - Otherwise attempt to look up the awarding agency by allocation_transfer_agency_id (ATA) CGAC.  Nothing else
          we can really do here.

    For AID:
        - At the request of the PO, in order to reduce the amount of one-off logic caused by several edge cases
          we bucket all treasury accounts in the same bucket as their "parent" federal account.  This will also
          make the filter tree control experience better as we should no longer end up with some outlying
          treasury accounts not falling under their apparent parent federal account in the tree control.
        - Please look at the FEDERAL_ACCOUNT_PARENT_AGENCY_MAPPING documentation for more details.
    """
    sql = f"""
        with
        ata_mapping as (
            select
                taa.treasury_account_identifier,
                ata.toptier_agency_id as awarding_toptier_agency_id
            from
                treasury_appropriation_account as taa
                left outer join toptier_agency as ata on
                    ata.toptier_code = case
                        when taa.allocation_transfer_agency_id in {DOD_SUBSUMED_AIDS} then '{DOD_AID}'
                        when taa.allocation_transfer_agency_id is not null THEN taa.allocation_transfer_agency_id
                        else taa.agency_id
                    end
        ),
        aid_mapping as (
            {FEDERAL_ACCOUNT_PARENT_AGENCY_MAPPING}
        )
        update
            treasury_appropriation_account as taa
        set
            awarding_toptier_agency_id = ata_mapping.awarding_toptier_agency_id,
            funding_toptier_agency_id = aid_mapping.parent_toptier_agency_id
        from
            ata_mapping,
            aid_mapping
        where
            ata_mapping.treasury_account_identifier = taa.treasury_account_identifier and
            aid_mapping.agency_identifier = taa.agency_id and
            aid_mapping.main_account_code = taa.main_account_code and (
                ata_mapping.awarding_toptier_agency_id is distinct from taa.awarding_toptier_agency_id or
                aid_mapping.parent_toptier_agency_id is distinct from taa.funding_toptier_agency_id
            )
    """

    return execute_dml_sql(sql)
def add_submission_ids(submission_ids: List[int]) -> int:
    """
    Forgivingly adds a list of submission ids to the submission queue.  Submission ids that already
    exist in the queue will remain untouched.  Returns the count of submission ids successfully added.
    """
    if not submission_ids:
        return 0

    values = ", ".join(f"({i}, '{DABSLoaderQueue.READY}', false)"
                       for i in submission_ids)

    sql = f"""
        insert into {DABSLoaderQueue._meta.db_table} (submission_id, state, force_reload) (
            values {values}
        ) on conflict (submission_id) do nothing
    """

    return execute_dml_sql(sql)
def delete_obsolete_rows(source: ETLObjectBase,
                         destination: ETLWritableObjectBase) -> int:
    """ Delete rows from destination that do not exist in source and return the number of rows deleted. """

    sql = """
        delete from {destination_object_representation}
        where not exists (
            select from {source_object_representation} s where {join}
        )
    """

    sql = SQL(sql).format(
        destination_object_representation=destination.object_representation,
        source_object_representation=source.object_representation,
        join=primatives.make_join_to_table_conditional(
            destination.key_columns, "s", destination.object_representation),
    )

    return sql_helpers.execute_dml_sql(sql)
def stage_table(source: ETLObjectBase, destination: ETLWritableObjectBase,
                staging: ETLTemporaryTable) -> int:
    """ Copy source table contents to staging table and return the number of rows copied. """

    shared_columns = _get_shared_columns(source.columns, destination.columns)

    sql = """
        create temporary table {staging_object_representation} as
        select {select_columns}
        from   {source_object_representation} as t
    """

    sql = SQL(sql).format(
        staging_object_representation=staging.object_representation,
        select_columns=primatives.make_column_list(shared_columns),
        source_object_representation=source.object_representation,
    )

    return sql_helpers.execute_dml_sql(sql)
示例#11
0
def update_federal_account_agency():

    sql = f"""
        with
        federal_account_parent_agency_mapping as (
            {FEDERAL_ACCOUNT_PARENT_AGENCY_MAPPING}
        )
        update
            federal_account as fa
        set
            parent_toptier_agency_id = fapam.parent_toptier_agency_id
        from
            federal_account_parent_agency_mapping as fapam
        where
            fapam.agency_identifier = fa.agency_identifier and
            fapam.main_account_code = fa.main_account_code and
            fapam.parent_toptier_agency_id is distinct from fa.parent_toptier_agency_id
    """

    return execute_dml_sql(sql)
示例#12
0
def upsert_records_with_predicate(source: ETLObjectBase,
                                  destination: ETLWritableObjectBase,
                                  predicate: list, primary_key: str) -> int:
    # Destination columns that are in source or are overridden.
    insertable_columns = _get_shared_columns(
        source.columns + list(destination.insert_overrides),
        destination.columns)

    excluded = SQL(", ").join([
        SQL("{dest} = {source}").format(dest=Identifier(field),
                                        source=SQL("EXCLUDED.") +
                                        Identifier(field))
        for field in insertable_columns
    ])

    upsert_sql_template = """
        INSERT INTO {destination_object_representation} ({insert_columns})
        SELECT      {select_columns}
        FROM        {source_object} AS {alias}
        ON CONFLICT ({primary_key}) DO UPDATE SET
        {excluded}
        RETURNING {primary_key}
    """
    alias = "s"

    sql = SQL(upsert_sql_template).format(
        primary_key=Identifier(primary_key),
        alias=Identifier(alias),
        destination_object_representation=destination.object_representation,
        insert_columns=primatives.make_column_list(insertable_columns),
        select_columns=primatives.make_column_list(
            insertable_columns, alias, destination.insert_overrides),
        source_object=source.object_representation_custom_predicate(predicate),
        excluded=excluded,
    )

    return sql_helpers.execute_dml_sql(sql)
def run_sql(timer_message, sql):
    with OneLineTimer(timer_message) as t:
        rows_affected = execute_dml_sql(sql)
    t.log_message(rows_affected)
    return rows_affected
 def _vacuum_tables(self):
     with Timer(f"Vacuum {self.destination_table_name}"):
         execute_dml_sql(f'vacuum (full, analyze) "{self.destination_table_name}"')
def run_sqls(sqls):
    for s in sqls:
        with OneLineTimer(s.log) as t:
            count = execute_dml_sql(s.sql)
        logger.info(t.success_message + (f"... {count:,} rows affected" if count is not None else ""))
示例#16
0
def update_subaward_city_county(table_name=Subaward._meta.db_table):
    """
    Updates pop_county_code, pop_county_name, pop_city_code, recipient_location_county_code,
    recipient_location_county_name, and recipient_location_city_code in the Subaward (or a Subaward-like)
    table.  Returns the count of rows affected.
    """
    sql = f"""
        with
        address_info as (
            select distinct on (upper(feature_name), state_alpha)
                upper(feature_name) as feature_name,
                state_alpha,
                county_numeric,
                upper(county_name) as county_name,
                census_code

            from
                ref_city_county_state_code

            where
                feature_class = 'Populated Place' and
                coalesce(feature_name, '') !=  '' and
                coalesce(state_alpha, '') !=  ''

            order by
                upper(feature_name),
                state_alpha,
                county_sequence,
                coalesce(date_edited, date_created) desc,
                id desc
        )
        update
            "{table_name}" as s1

        set
            pop_county_code = pop.county_numeric,
            pop_county_name = pop.county_name,
            pop_city_code = pop.census_code,
            recipient_location_county_code = rec.county_numeric,
            recipient_location_county_name = rec.county_name,
            recipient_location_city_code = rec.census_code

        from
            "{table_name}" as s
            left outer join address_info as pop on
                pop.feature_name = s.pop_city_name and
                pop.state_alpha = s.pop_state_code
            left outer join address_info as rec on
                rec.feature_name = s.recipient_location_city_name and
                rec.state_alpha = s.recipient_location_state_code

        where
            s.id = s1.id and (
                pop.county_numeric is distinct from s.pop_county_code or
                pop.county_name is distinct from s.pop_county_name or
                pop.census_code is distinct from s.pop_city_code or
                rec.county_numeric is distinct from s.recipient_location_county_code or
                rec.county_name is distinct from s.recipient_location_county_name or
                rec.census_code is distinct from s.recipient_location_city_code
            )
    """

    return execute_dml_sql(sql)