def perform_load(self, ids_to_delete, ids_to_insert):

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        if len(ids_to_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(ids_to_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")
示例#2
0
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(
                update_award_ids))  # Convert to tuple and remove duplicates.
            with timer(
                    "updating awards to reflect their latest associated transaction info",
                    logger.info):
                award_record_count = update_awards(update_award_ids)
                logger.info(
                    "{} awards updated from their transactional data".format(
                        award_record_count))
            with timer("updating awards with executive compensation data",
                       logger.info):
                award_record_count = update_assistance_awards(update_award_ids)
                logger.info(
                    "{} awards updated FABS-specific and exec comp data".
                    format(award_record_count))

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects.filter(
                external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
            ).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime("%Y-%m-%d")

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete()
        ExternalDataLoadDate(
            last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
        ).save()

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
示例#4
0
    def handle(self, *args, **options):
        logger.info('Starting FABS nightly data load...')

        # Use date provided or pull most recent ExternalDataLoadDate
        if options.get('date'):
            date = options.get('date')[0]
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FABS starting from %s' % date)

        # Retrieve FABS data
        with timer('retrieving/diff-ing FABS Data', logger.info):
            to_insert, ids_to_delete = self.get_fabs_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            # Create a file with the deletion IDs and place in a bucket for ElasticSearch
            self.send_deletes_to_s3(ids_to_delete)

            # Delete FABS records by ID
            with timer('deleting stale FABS data', logger.info):
                self.delete_stale_fabs(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            # Add FABS records
            with timer('inserting new FABS data', logger.info):
                self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows)

            # Update Awards based on changed FABS records
            with timer('updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            # Update AwardCategories based on changed FABS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))

            # Check the linkages from file C to FABS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('assistance')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save()

        logger.info('FABS NIGHTLY UPDATE FINISHED!')
示例#5
0
    def handle(self, *args, **options):
        logger.info('Starting FPDS nightly data load...')

        if options.get('date'):
            date = options.get('date')[0]
            date = datetime.strptime(date, '%Y-%m-%d').date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FPDS starting from %s' % date)

        with timer('retrieving/diff-ing FPDS Data', logger.info):
            to_insert, ids_to_delete = self.get_fpds_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            with timer('deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            # Add FPDS records
            with timer('inserting new FPDS data', logger.info):
                self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows)

            # Update Awards based on changed FPDS records
            with timer('updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
                update_contract_awards(tuple(award_update_id_list))

            # Update AwardCategories based on changed FPDS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('contract')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).save()

        logger.info('FPDS NIGHTLY UPDATE FINISHED!')
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1)
            date = get_last_load_date("fpds", default=default_last_load_date).date()
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        update_last_load_date("fpds", processing_start_datetime)

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
示例#7
0
 def update_award_records(awards, skip_cd_linkage=True):
     if awards:
         unique_awards = set(awards)
         logger.info(f"{len(unique_awards)} award records impacted by transaction DML operations")
         logger.info(f"{prune_empty_awards(tuple(unique_awards))} award records removed")
         logger.info(f"{update_awards(tuple(unique_awards))} award records updated")
         logger.info(
             f"{update_procurement_awards(tuple(unique_awards))} award records updated on FPDS-specific fields"
         )
         if not skip_cd_linkage:
             update_c_to_d_linkages("contract")
     else:
         logger.info("No award records to update")
示例#8
0
def load_file_c(submission_attributes, db_cursor, certified_award_financial):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some
    additional information about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we
    # change up the order of file loading

    if certified_award_financial.count == 0:
        logger.warning("No File C (award financial) data found, skipping...")
        return

    reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$")

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}
    total_rows = certified_award_financial.count
    start_time = datetime.now()

    bulk_treasury_appropriation_account_tas_lookup(
        certified_award_financial.tas_ids, db_cursor)

    _save_file_c_rows(certified_award_financial, total_rows, start_time,
                      skipped_tas, submission_attributes, reverse)

    update_c_to_d_linkages("contract", False,
                           submission_attributes.submission_id)
    update_c_to_d_linkages("assistance", False,
                           submission_attributes.submission_id)

    for key in skipped_tas:
        logger.info(
            f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}"
        )

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]["count"]

    logger.info(
        f"Skipped a total of {total_tas_skipped:,} TAS rows for File C")
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(update_award_ids))  # Convert to tuple and remove duplicates.
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(update_award_ids)
            with timer("updating award category variables", logger.info):
                update_award_categories(update_award_ids)

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
示例#10
0
def load_file_c(submission_attributes, db_cursor, certified_award_financial):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some
    additional information about the awarding sub-tier agency.
    """

    if certified_award_financial.count == 0:
        logger.warning("No File C (award financial) data found, skipping...")
        return

    # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we
    # change up the order of file loading
    reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$")
    skipped_tas = defaultdict(
        int)  # tracks count of rows skipped due to "missing" TAS
    total_rows = certified_award_financial.count
    start_time = datetime.now()

    bulk_treasury_appropriation_account_tas_lookup(
        certified_award_financial.tas_ids, db_cursor)

    _save_file_c_rows(certified_award_financial, total_rows, start_time,
                      skipped_tas, submission_attributes, reverse)

    update_c_to_d_linkages("contract", False,
                           submission_attributes.submission_id)
    update_c_to_d_linkages("assistance", False,
                           submission_attributes.submission_id)

    for tas, count in skipped_tas.items():
        logger.info(f"Skipped {count:,} rows due to {tas}")

    total_tas_skipped = sum([count for count in skipped_tas.values()])

    if total_tas_skipped > 0:
        logger.info(
            f"SKIPPED {total_tas_skipped:,} ROWS of File C (missing TAS)")
    else:
        logger.info("All File C records in Broker loaded into USAspending")
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(
                update_award_ids))  # Convert to tuple and remove duplicates.
            with timer(
                    "updating awards to reflect their latest associated transaction info",
                    logger.info):
                update_awards(update_award_ids)
            with timer("updating award category variables", logger.info):
                update_award_categories(update_award_ids)

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
示例#12
0
    def handle(self, *args, **options):
        logger.info("Starting FABS data load script...")
        start_date = datetime.now(timezone.utc).strftime('%Y-%m-%d')

        fabs_load_db_id = lookups.EXTERNAL_DATA_TYPE_DICT['fabs']
        data_load_date_obj = ExternalDataLoadDate.objects.filter(
            external_data_type_id=fabs_load_db_id).first()

        if options.get("date"):  # if provided, use cli data
            load_from_date = options.get("date")[0]
        elif data_load_date_obj:  # else if last run is in DB, use that
            load_from_date = data_load_date_obj.last_load_date
        else:  # Default is yesterday at midnight
            load_from_date = (datetime.now(timezone.utc) -
                              timedelta(days=1)).strftime('%Y-%m-%d')

        logger.info('Processing data for FABS starting from %s' %
                    load_from_date)

        with timer('retrieving/diff-ing FABS Data', logger.info):
            upsert_transactions = self.get_fabs_transaction_ids(
                date=load_from_date)

        with timer("obtaining delete records", logger.info):
            ids_to_delete = self.get_fabs_records_to_delete(
                date=load_from_date)

        if ids_to_delete:
            self.store_deleted_fabs(ids_to_delete)

            # Delete FABS records by ID
            with timer("deleting stale FABS data", logger.info):
                self.delete_stale_fabs(ids_to_delete=ids_to_delete)
                del ids_to_delete
        else:
            logger.info("Nothing to delete...")

        if upsert_transactions:
            # Add FABS records
            with timer('inserting new FABS data', logger.info):
                self.insert_all_new_fabs(all_new_to_insert=upsert_transactions)

            # Update Awards based on changed FABS records
            with timer(
                    'updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FABS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FABS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('assistance')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(
            external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']
        ).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.
                             EXTERNAL_DATA_TYPE_DICT['fabs']).save()

        logger.info('FABS UPDATE FINISHED!')
示例#13
0
    def handle(self, *args, **options):

        with transaction.atomic():
            for link_type in self.LINKAGE_TYPES:
                update_c_to_d_linkages(type=link_type)
    def handle(self, *args, **options):

        with transaction.atomic():
            for link_type in self.LINKAGE_TYPES:
                update_c_to_d_linkages(type=link_type)
示例#15
0
 def run_sql(self, submission=None):
     for link_type in self.LINKAGE_TYPES:
         update_c_to_d_linkages(type=link_type, submission_id=submission)