def nightly_loader(self, start_date):

        logger.info("==== Starting FPDS nightly data load ====")

        if start_date:
            date = start_date
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1)
            date = get_last_load_date("fpds", default=default_last_load_date).date()
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            ids_to_insert = self.get_fpds_transaction_ids(date=date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        self.perform_load(
            ids_to_delete,
            ids_to_insert,
        )

        # Update the date for the last time the data load was run
        update_last_load_date("fpds", processing_start_datetime)

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
    def handle(self, *args, **options):
        logger.info('Starting updating awarding agencies...')

        fiscal_year = options.get('fiscal_year')[0]

        page = options.get('page')
        limit = options.get('limit')

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if options.get('contracts', None):
            with timer('D1 (contracts/FPDS) awarding/funding agencies updates',
                       logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D1',
                                                    page=page,
                                                    limit=limit)

        elif options.get('assistance', None):
            with timer(
                    'D2 (assistance/FABS) awarding/funding agencies updates',
                    logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D2',
                                                    page=page,
                                                    limit=limit)

        else:
            logger.error('Not a valid data type: --assistance,--contracts')

        logger.info('Finished')
Exemplo n.º 3
0
    def perform_load(self, ids_to_delete, ids_to_insert):

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        if len(ids_to_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(ids_to_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                award_record_count = update_awards(tuple(AWARD_UPDATE_ID_LIST))
                logger.info("{} awards updated from their transactional data".format(award_record_count))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                award_record_count = update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))
                logger.info("{} awards updated FPDS-specific and exec comp data".format(award_record_count))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")
Exemplo n.º 4
0
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(
                update_award_ids))  # Convert to tuple and remove duplicates.
            with timer(
                    "updating awards to reflect their latest associated transaction info",
                    logger.info):
                award_record_count = update_awards(update_award_ids)
                logger.info(
                    "{} awards updated from their transactional data".format(
                        award_record_count))
            with timer("updating awards with executive compensation data",
                       logger.info):
                award_record_count = update_assistance_awards(update_award_ids)
                logger.info(
                    "{} awards updated FABS-specific and exec comp data".
                    format(award_record_count))

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
Exemplo n.º 5
0
    def handle(self, *args, **options):
        logger.info("Starting updating awarding agencies...")

        fiscal_year = options.get("fiscal_year")[0]

        page = options.get("page")
        limit = options.get("limit")

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if options.get("contracts", None):
            with timer("D1 (contracts/FPDS) awarding/funding agencies updates",
                       logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    "D1",
                                                    page=page,
                                                    limit=limit)

        elif options.get("assistance", None):
            with timer(
                    "D2 (assistance/FABS) awarding/funding agencies updates",
                    logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    "D2",
                                                    page=page,
                                                    limit=limit)

        else:
            logger.error("Not a valid data type: --assistance,--contracts")

        logger.info("Finished")
Exemplo n.º 6
0
    def handle(self, *args, **options):
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Starting FABS data load script...")

        # "Reload all" supersedes all other processing options.
        reload_all = options["reload_all"]
        if reload_all:
            ids = None
            afa_ids = None
            start_datetime = None
            end_datetime = None
        else:
            ids = options["ids"]
            afa_ids = set(options["afa_ids"])
            if options["afa_id_file"]:
                afa_ids = tuple(
                    afa_ids | read_afa_ids_from_file(options["afa_id_file"]))
            start_datetime = options["start_datetime"]
            end_datetime = options["end_datetime"]

        # If no other processing options were provided than this is an incremental load.
        is_incremental_load = not any(
            (reload_all, ids, afa_ids, start_datetime, end_datetime))

        if is_incremental_load:
            start_datetime = get_incremental_load_start_datetime()
            logger.info(
                f"Processing data for FABS starting from {start_datetime} (includes offset)"
            )

            # We only perform deletes with incremental loads.
            with timer("obtaining delete records", logger.info):
                delete_records = retrieve_deleted_fabs_transactions(
                    start_datetime, end_datetime)
                ids_to_delete = [
                    item for sublist in delete_records.values()
                    for item in sublist if item
                ]
                ids_to_delete = get_delete_pks_for_afa_keys(ids_to_delete)
            logger.info(f"{len(ids_to_delete):,} delete ids found in total")

        with timer("retrieving IDs of FABS to process", logger.info):
            ids_to_upsert = get_fabs_transaction_ids(ids, afa_ids,
                                                     start_datetime,
                                                     end_datetime)

        update_award_ids = delete_fabs_transactions(
            ids_to_delete) if is_incremental_load else []
        upsert_fabs_transactions(ids_to_upsert, update_award_ids)

        if is_incremental_load:
            logger.info(
                f"Storing {processing_start_datetime} for the next incremental run"
            )
            update_last_load_date("fabs", processing_start_datetime)

        logger.info("FABS UPDATE FINISHED!")
    def handle(self, *args, **options):
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Starting FABS data load script...")

        do_not_log_deletions = options["do_not_log_deletions"]

        # "Reload all" supersedes all other processing options.
        reload_all = options["reload_all"]
        if reload_all:
            submission_ids = None
            afa_ids = None
            start_datetime = None
            end_datetime = None
        else:
            submission_ids = tuple(options["submission_ids"]
                                   ) if options["submission_ids"] else None
            afa_ids = read_afa_ids_from_file(
                options['afa_id_file']) if options['afa_id_file'] else None
            start_datetime = options["start_datetime"]
            end_datetime = options["end_datetime"]

        # If no other processing options were provided than this is an incremental load.
        is_incremental_load = not any((reload_all, submission_ids, afa_ids,
                                       start_datetime, end_datetime))

        if is_incremental_load:
            last_load_date = get_last_load_date()
            submission_ids = get_new_submission_ids(last_load_date)
            logger.info("Processing data for FABS starting from %s" %
                        last_load_date)

        if is_incremental_load and not submission_ids:
            logger.info("No new submissions. Exiting.")

        else:
            with timer("obtaining delete records", logger.info):
                ids_to_delete = get_fabs_records_to_delete(
                    submission_ids, afa_ids, start_datetime, end_datetime)

            with timer("retrieving/diff-ing FABS Data", logger.info):
                ids_to_upsert = get_fabs_transaction_ids(
                    submission_ids, afa_ids, start_datetime, end_datetime)

            update_award_ids = delete_fabs_transactions(
                ids_to_delete, do_not_log_deletions)
            upsert_fabs_transactions(ids_to_upsert, update_award_ids)

        if is_incremental_load:
            update_last_load_date("fabs", processing_start_datetime)

        logger.info("FABS UPDATE FINISHED!")
Exemplo n.º 8
0
    def handle(self, *args, **options):
        logger.info("Starting historical data load...")

        db_cursor = connections["data_broker"].cursor()
        fiscal_year = options.get("fiscal_year")
        page = options.get("page")
        limit = options.get("limit")

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info("Processing data for Fiscal Year " + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options["assistance"]:
            with timer("D1 historical data load", logger.info):
                self.update_transaction_contract(db_cursor=db_cursor,
                                                 fiscal_year=fiscal_year,
                                                 page=page,
                                                 limit=limit)

        if not options["contracts"]:
            with timer("D2 historical data load", logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor,
                                                   fiscal_year=fiscal_year,
                                                   page=page,
                                                   limit=limit)

        with timer(
                "updating awards to reflect their latest associated transaction info",
                logger.info):
            update_awards(tuple(award_update_id_list))

        with timer(
                "updating assistance-specific awards to reflect their latest transaction info",
                logger.info):
            update_assistance_awards(tuple(award_assistance_update_id_list))

        with timer(
                "updating contract-specific awards to reflect their latest transaction info",
                logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        # Done!
        logger.info("FINISHED")
Exemplo n.º 9
0
def test_timer(capsys):
    """Verify that timer helper executes without error"""

    with timer():
        print("Doing a thing")
    output = capsys.readouterr()[0]
    assert "Beginning" in output
    assert "finished" in output
Exemplo n.º 10
0
def test_timer_times(capsys):
    """Verify that timer shows longer times for slower operations"""

    pattern = re.compile(r"([\d\.e\-]+)s")

    with timer():
        print("Doing a thing")
    output0 = capsys.readouterr()[0]
    time0 = float(pattern.search(output0).group(1))

    with timer():
        print("Doing a slower thing")
        time.sleep(0.1)
    output1 = capsys.readouterr()[0]
    time1 = float(pattern.search(output1).group(1))

    assert time1 > time0
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor,
                                                 fiscal_year=fiscal_year,
                                                 page=page,
                                                 limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor,
                                                   fiscal_year=fiscal_year,
                                                   page=page,
                                                   limit=limit)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards(tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info',
                logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
Exemplo n.º 12
0
    def handle(self, *args, **options):
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Starting FABS data load script...")

        # "Reload all" supersedes all other processing options.
        reload_all = options["reload_all"]
        if reload_all:
            afa_ids = None
            start_datetime = None
            end_datetime = None
        else:
            afa_ids = read_afa_ids_from_file(
                options["afa_id_file"]) if options["afa_id_file"] else None
            start_datetime = options["start_datetime"]
            end_datetime = options["end_datetime"]

        # If no other processing options were provided than this is an incremental load.
        is_incremental_load = not any(
            (reload_all, afa_ids, start_datetime, end_datetime))

        if is_incremental_load:
            start_datetime = get_last_load_date()
            logger.info("Processing data for FABS starting from %s" %
                        start_datetime)

        with timer("obtaining delete records", logger.info):
            delete_records = retrieve_deleted_fabs_transactions(
                start_datetime, end_datetime)
            ids_to_delete = [
                item for sublist in delete_records.values() for item in sublist
                if item
            ]

        with timer("retrieving/diff-ing FABS Data", logger.info):
            ids_to_upsert = get_fabs_transaction_ids(afa_ids, start_datetime,
                                                     end_datetime)

        update_award_ids = delete_fabs_transactions(ids_to_delete)
        upsert_fabs_transactions(ids_to_upsert, update_award_ids)

        if is_incremental_load:
            update_last_load_date("fabs", processing_start_datetime)

        logger.info("FABS UPDATE FINISHED!")
Exemplo n.º 13
0
    def handle(self, *args, **options):
        logger.info('Starting updates to award data...')

        all_records_flag = options.get('all')
        fiscal_year = options.get('fiscal_year')

        award_update_id_list = []
        award_contract_update_id_list = []

        if not all_records_flag:
            if fiscal_year:
                fiscal_year = fiscal_year[0]
                logger.info('Processing data for Fiscal Year ' +
                            str(fiscal_year))
            else:
                fiscal_year = 2017

            # Lists to store for update_awards and update_contract_awards
            award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\
                values_list('award_id', flat=True)
            award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\
                values_list('transaction__award_id', flat=True)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards() if all_records_flag else update_awards(
                tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info...',
                logger.info):
            if all_records_flag:
                update_contract_awards()
            else:
                update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(
            ) if all_records_flag else update_award_categories(
                tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
def delete_fabs_transactions(ids_to_delete):
    """ids_to_delete are published_award_financial_assistance_ids"""
    if ids_to_delete:
        with timer(f"deleting {len(ids_to_delete)} stale FABS data", logger.info):
            update_award_ids = delete_stale_fabs(ids_to_delete)

    else:
        update_award_ids = []
        logger.info("Nothing to delete...")

    return update_award_ids
def delete_fabs_transactions(ids_to_delete):
    """
    ids_to_delete are afa_generated_unique ids
    """
    if ids_to_delete:
        with timer(f"deleting {len(ids_to_delete)} stale FABS data",
                   logger.info):
            update_award_ids = delete_stale_fabs(ids_to_delete)

    else:
        update_award_ids = []
        logger.info("Nothing to delete...")

    return update_award_ids
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(
                update_award_ids))  # Convert to tuple and remove duplicates.
            with timer(
                    "updating awards to reflect their latest associated transaction info",
                    logger.info):
                update_awards(update_award_ids)
            with timer("updating award category variables", logger.info):
                update_award_categories(update_award_ids)

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
Exemplo n.º 17
0
    def handle(self, *args, **options):
        logger.info("Starting historical data load...")

        db_cursor = connections["data_broker"].cursor()
        fiscal_year = options.get("fiscal_year")
        page = options.get("page")
        limit = options.get("limit")
        save = options.get("save")

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info("Processing data for Fiscal Year " + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options["assistance"]:
            with timer("D1 historical data location insert", logger.info):
                self.update_location_transaction_contract(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        if not options["contracts"]:
            with timer("D2 historical data location insert", logger.info):
                self.update_location_transaction_assistance(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        logger.info("FINISHED")
Exemplo n.º 18
0
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')
        save = options.get('save')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data location insert', logger.info):
                self.update_location_transaction_contract(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        if not options['contracts']:
            with timer('D2 historical data location insert', logger.info):
                self.update_location_transaction_assistance(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        logger.info('FINISHED')
Exemplo n.º 19
0
def delete_fabs_transactions(ids_to_delete, do_not_log_deletions):
    """
    ids_to_delete are afa_generated_unique ids
    """
    if ids_to_delete:
        if do_not_log_deletions is False:
            store_deleted_fabs(ids_to_delete)
        with timer("deleting stale FABS data", logger.info):
            update_award_ids = delete_stale_fabs(ids_to_delete)

    else:
        update_award_ids = []
        logger.info("Nothing to delete...")

    return update_award_ids
Exemplo n.º 20
0
    def handle(self, *args, **options):
        """
        Updates the column ussgl498100_upward_adjust_pri_deliv_orders_oblig_unpaid_cpe due to the incorrect
        mapping in settings.py
        """
        ds_cursor = connection.cursor()
        logger.info("Begin updating file B and C")

        broker_cols_b = self.get_list_of_broker_cols(financial_accounts_oc)
        broker_cols_type_b = self.get_list_of_broker_cols_types(
            financial_accounts_oc)
        website_cols_b = self.get_website_row_formatted(financial_accounts_oc)
        website_update_text_b = self.get_cols_to_update(financial_accounts_oc)
        website_cols_joins_b = self.get_file_table_joins(financial_accounts_oc)

        broker_cols_c = self.get_list_of_broker_cols(financial_accounts_awards)
        broker_cols_type_c = self.get_list_of_broker_cols_types(
            financial_accounts_awards)
        website_cols_c = self.get_website_row_formatted(
            financial_accounts_awards)
        website_update_text_c = self.get_cols_to_update(
            financial_accounts_awards)
        website_cols_joins_c = self.get_file_table_joins(
            financial_accounts_awards)

        with timer("getting submission ids to update", logger.info):
            submissions_to_update = self.get_list_of_submissions()

        for submission in submissions_to_update:
            submission_id = submission[0]

            # File B Updates
            logger.info(
                "loading rows data to update File B submission {}".format(
                    submission_id))

            with timer(
                    "retrieving rows to update for File B submission {}".
                    format(submission_id), logger.info):
                get_rows_to_update_query = self.get_rows_to_update(
                    "B", submission_id, broker_cols_b, broker_cols_type_b,
                    website_cols_b)
                ds_cursor.execute(get_rows_to_update_query)

            with timer(
                    "updating rows for File B submission {}".format(
                        submission_id), logger.info):
                update_rows = self.update_website_rows(
                    "financial_accounts_by_program_activity_object_class",
                    "file_b_rows_to_update",
                    website_update_text_b,
                    website_cols_joins_b,
                )

                ds_cursor.execute(update_rows)

            # File C updates
            with timer(
                    "retrieving rows to update for File C submission {}".
                    format(submission_id), logger.info):
                get_rows_to_update_query = self.get_rows_to_update(
                    "C", submission_id, broker_cols_c, broker_cols_type_c,
                    website_cols_c)
                ds_cursor.execute(get_rows_to_update_query)

            with timer(
                    "updating rows for File C submission {}".format(
                        submission_id), logger.info):
                update_rows = self.update_website_rows(
                    "financial_accounts_by_awards", "file_c_rows_to_update",
                    website_update_text_c, website_cols_joins_c)
                ds_cursor.execute(update_rows)

            ds_cursor.execute("DROP TABLE file_b_rows_to_update")
            ds_cursor.execute("DROP TABLE file_c_rows_to_update")

        logger.info("Done updating file B and C mappings")