def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')
        save = options.get('save')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data location insert', logger.info):
                self.update_location_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page,
                                                          limit=limit, save=save)

        if not options['contracts']:
            with timer('D2 historical data location insert', logger.info):
                self.update_location_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page,
                                                            limit=limit, save=save)

        logger.info('FINISHED')
    def handle(self, *args, **options):
        logger.info('Starting updating awarding agencies...')

        fiscal_year = options.get('fiscal_year')[0]

        page = options.get('page')
        limit = options.get('limit')

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if options.get('contracts', None):
            with timer('D1 (contracts/FPDS) awarding/funding agencies updates',
                       logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D1',
                                                    page=page,
                                                    limit=limit)

        elif options.get('assistance', None):
            with timer(
                    'D2 (assistance/FABS) awarding/funding agencies updates',
                    logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D2',
                                                    page=page,
                                                    limit=limit)

        else:
            logger.error('Not a valid data type: --assistance,--contracts')

        logger.info('Finished')
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects.filter(
                external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
            ).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime("%Y-%m-%d")

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete()
        ExternalDataLoadDate(
            last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]
        ).save()

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
示例#4
0
def test_timer(capsys):
    'Verify that timer helper executes without error'

    with timer():
        print('Doing a thing')
    output = capsys.readouterr()[0]
    assert 'Beginning' in output
    assert 'finished' in output
示例#5
0
def test_timer_times(capsys):
    'Verify that timer shows longer times for slower operations'

    pattern = re.compile(r'([\d\.e\-]+) sec')

    with timer():
        print('Doing a thing')
    output0 = capsys.readouterr()[0]
    time0 = float(pattern.search(output0).group(1))

    with timer():
        print('Doing a slower thing')
        time.sleep(0.1)
    output1 = capsys.readouterr()[0]
    time1 = float(pattern.search(output1).group(1))

    assert time1 > time0
def test_timer_times(capsys):
    'Verify that timer shows longer times for slower operations'

    pattern = re.compile(r'([\d\.e\-]+)s')

    with timer():
        print('Doing a thing')
    output0 = capsys.readouterr()[0]
    time0 = float(pattern.search(output0).group(1))

    with timer():
        print('Doing a slower thing')
        time.sleep(0.1)
    output1 = capsys.readouterr()[0]
    time1 = float(pattern.search(output1).group(1))

    assert time1 > time0
def test_timer(capsys):
    'Verify that timer helper executes without error'

    with timer():
        print('Doing a thing')
    output = capsys.readouterr()[0]
    assert 'Beginning' in output
    assert 'finished' in output
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor,
                                                 fiscal_year=fiscal_year,
                                                 page=page,
                                                 limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor,
                                                   fiscal_year=fiscal_year,
                                                   page=page,
                                                   limit=limit)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards(tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info',
                logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
示例#9
0
    def handle(self, *args, **options):
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Starting FABS data load script...")

        do_not_log_deletions = options["do_not_log_deletions"]

        # "Reload all" supersedes all other processing options.
        reload_all = options["reload_all"]
        if reload_all:
            submission_ids = None
            afa_ids = None
            start_datetime = None
            end_datetime = None
        else:
            submission_ids = tuple(options["submission_ids"]) if options["submission_ids"] else None
            afa_ids = read_afa_ids_from_file(options['afa_id_file']) if options['afa_id_file'] else None
            start_datetime = options["start_datetime"]
            end_datetime = options["end_datetime"]

        # If no other processing options were provided than this is an incremental load.
        is_incremental_load = not any((reload_all, submission_ids, afa_ids, start_datetime, end_datetime))

        if is_incremental_load:
            last_load_date = get_last_load_date()
            submission_ids = get_new_submission_ids(last_load_date)
            logger.info("Processing data for FABS starting from %s" % last_load_date)

        if is_incremental_load and not submission_ids:
            logger.info("No new submissions. Exiting.")

        else:
            with timer("obtaining delete records", logger.info):
                ids_to_delete = get_fabs_records_to_delete(submission_ids, afa_ids, start_datetime, end_datetime)

            with timer("retrieving/diff-ing FABS Data", logger.info):
                ids_to_upsert = get_fabs_transaction_ids(submission_ids, afa_ids, start_datetime, end_datetime)

            update_award_ids = delete_fabs_transactions(ids_to_delete, do_not_log_deletions)
            upsert_fabs_transactions(ids_to_upsert, update_award_ids)

        if is_incremental_load:
            update_last_load_date("fabs", processing_start_datetime)

        logger.info("FABS UPDATE FINISHED!")
示例#10
0
    def handle(self, *args, **options):
        logger.info('Starting FPDS nightly data load...')

        if options.get('date'):
            date = options.get('date')[0]
            date = datetime.strptime(date, '%Y-%m-%d').date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FPDS starting from %s' % date)

        with timer('retrieving/diff-ing FPDS Data', logger.info):
            to_insert, ids_to_delete = self.get_fpds_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            with timer('deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            # Add FPDS records
            with timer('inserting new FPDS data', logger.info):
                self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows)

            # Update Awards based on changed FPDS records
            with timer('updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
                update_contract_awards(tuple(award_update_id_list))

            # Update AwardCategories based on changed FPDS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('contract')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).save()

        logger.info('FPDS NIGHTLY UPDATE FINISHED!')
    def handle(self, *args, **options):
        logger.info("==== Starting FPDS nightly data load ====")

        if options.get("date"):
            date = options.get("date")[0]
            date = datetime.strptime(date, "%Y-%m-%d").date()
        else:
            default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1)
            date = get_last_load_date("fpds", default=default_last_load_date).date()
        processing_start_datetime = datetime.now(timezone.utc)

        logger.info("Processing data for FPDS starting from %s" % date)

        with timer("retrieval of deleted FPDS IDs", logger.info):
            ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date)

        if len(ids_to_delete) > 0:
            with timer("deletion of all stale FPDS data", logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info("No FPDS records to delete at this juncture")

        with timer("retrieval of new/modified FPDS data ID list", logger.info):
            total_insert = self.get_fpds_transaction_ids(date=date)

        if len(total_insert) > 0:
            # Add FPDS records
            with timer("insertion of new FPDS data in batches", logger.info):
                self.insert_all_new_fpds(total_insert)

            # Update Awards based on changed FPDS records
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update FPDS-specific Awards based on the info in child transactions
            with timer("updating contract-specific awards to reflect their latest transaction info", logger.info):
                update_contract_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FPDS records
            with timer("updating award category variables", logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FPDS records and update any that are missing
            with timer("updating C->D linkages", logger.info):
                update_c_to_d_linkages("contract")
        else:
            logger.info("No FPDS records to insert or modify at this juncture")

        # Update the date for the last time the data load was run
        update_last_load_date("fpds", processing_start_datetime)

        logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(update_award_ids))  # Convert to tuple and remove duplicates.
            with timer("updating awards to reflect their latest associated transaction info", logger.info):
                update_awards(update_award_ids)
            with timer("updating award category variables", logger.info):
                update_award_categories(update_award_ids)

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
示例#13
0
    def handle(self, *args, **options):
        logger.info('Starting FABS nightly data load...')

        # Use date provided or pull most recent ExternalDataLoadDate
        if options.get('date'):
            date = options.get('date')[0]
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FABS starting from %s' % date)

        # Retrieve FABS data
        with timer('retrieving/diff-ing FABS Data', logger.info):
            to_insert, ids_to_delete = self.get_fabs_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            # Create a file with the deletion IDs and place in a bucket for ElasticSearch
            self.send_deletes_to_s3(ids_to_delete)

            # Delete FABS records by ID
            with timer('deleting stale FABS data', logger.info):
                self.delete_stale_fabs(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            # Add FABS records
            with timer('inserting new FABS data', logger.info):
                self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows)

            # Update Awards based on changed FABS records
            with timer('updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            # Update AwardCategories based on changed FABS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))

            # Check the linkages from file C to FABS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('assistance')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save()

        logger.info('FABS NIGHTLY UPDATE FINISHED!')
    def handle(self, *args, **options):
        logger.info('Starting row deletion...')

        if options['batches']:
            limit = options['batches'] * options['batchsize']
        else:
            limit = None
        with timer('executing query', logger.info):
            cursor = self.fabs_cursor(limit)
        batch_no = 1
        while (not options['batches']) or (batch_no <= options['batches']):
            message = 'Batch {} of {} rows'.format(batch_no, options['batchsize'])
            with timer(message, logging.info):
                rows = cursor.fetchmany(options['batchsize'])
            if not rows:
                logger.info('No further rows; finished')
                return
            delete_ids = [r[0] for r in rows]
            with timer('deleting rows', logger.info):
                store_deleted_fabs(delete_ids)
                delete_stale_fabs(delete_ids)
            batch_no += 1
        logger.info('{} batches finished, complete'.format(batch_no - 1))
    def handle(self, *args, **options):
        logger.info('Starting updating awarding agencies...')

        fiscal_year = options.get('fiscal_year')[0]

        page = options.get('page')
        limit = options.get('limit')

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if options.get('contracts', None):
            with timer('D1 (contracts/FPDS) awarding/funding agencies updates', logger.info):
                self.update_awarding_funding_agency(fiscal_year, 'D1', page=page, limit=limit)

        elif options.get('assistance', None):
            with timer('D2 (assistance/FABS) awarding/funding agencies updates', logger.info):
                self.update_awarding_funding_agency(fiscal_year, 'D2', page=page, limit=limit)

        else:
            logger.error('Not a valid data type: --assistance,--contracts')

        logger.info('Finished')
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids):
    if ids_to_upsert or externally_updated_award_ids:
        update_award_ids = copy(externally_updated_award_ids)

        if ids_to_upsert:
            with timer("inserting new FABS data", logger.info):
                update_award_ids.extend(insert_all_new_fabs(ids_to_upsert))

        if update_award_ids:
            update_award_ids = tuple(set(
                update_award_ids))  # Convert to tuple and remove duplicates.
            with timer(
                    "updating awards to reflect their latest associated transaction info",
                    logger.info):
                update_awards(update_award_ids)
            with timer("updating award category variables", logger.info):
                update_award_categories(update_award_ids)

        with timer("updating C->D linkages", logger.info):
            update_c_to_d_linkages("assistance")

    else:
        logger.info("Nothing to insert...")
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')
        save = options.get('save')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data location insert', logger.info):
                self.update_location_transaction_contract(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        if not options['contracts']:
            with timer('D2 historical data location insert', logger.info):
                self.update_location_transaction_assistance(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        logger.info('FINISHED')
示例#18
0
def delete_fabs_transactions(ids_to_delete, do_not_log_deletions):
    """
    ids_to_delete are afa_generated_unique ids
    """
    if ids_to_delete:
        if do_not_log_deletions is False:
            store_deleted_fabs(ids_to_delete)
        with timer("deleting stale FABS data", logger.info):
            update_award_ids = delete_stale_fabs(ids_to_delete)

    else:
        update_award_ids = []
        logger.info("Nothing to delete...")

    return update_award_ids
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        with timer('updating awards to reflect their latest associated transaction info', logger.info):
            update_awards(tuple(award_update_id_list))

        with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
示例#20
0
    def handle(self, *args, **options):
        logger.info('Starting updates to award data...')

        all_records_flag = options.get('all')
        fiscal_year = options.get('fiscal_year')

        award_update_id_list = []
        award_contract_update_id_list = []

        if not all_records_flag:
            if fiscal_year:
                fiscal_year = fiscal_year[0]
                logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
            else:
                fiscal_year = 2017

            # Lists to store for update_awards and update_contract_awards
            award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\
                values_list('award_id', flat=True)
            award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\
                values_list('transaction__award_id', flat=True)

        with timer('updating awards to reflect their latest associated transaction info', logger.info):
            update_awards() if all_records_flag else update_awards(tuple(award_update_id_list))

        with timer('updating contract-specific awards to reflect their latest transaction info...', logger.info):
            if all_records_flag:
                update_contract_awards()
            else:
                update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories() if all_records_flag else update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
示例#21
0
    def handle(self, *args, **options):
        logger.info('Starting row deletion...')

        if options['batches']:
            limit = options['batches'] * options['batchsize']
        else:
            limit = None
        with timer('executing query', logger.info):
            cursor = self.fabs_cursor(limit)
        batch_no = 1
        while ((not options['batches']) or (batch_no <= options['batches'])):
            message = 'Batch {} of {} rows'.format(batch_no,
                                                   options['batchsize'])
            with timer(message, logging.info):
                rows = cursor.fetchmany(options['batchsize'])
            if not rows:
                logger.info('No further rows; finished')
                return
            ids = [r[0] for r in rows]
            with timer('deleting rows', logger.info):
                TransactionNormalized.objects.\
                    filter(assistance_data__afa_generated_unique__in=ids).delete()
            batch_no += 1
        logger.info('{} batches finished, complete'.format(batch_no - 1))
def delete_fabs_transactions(ids_to_delete, do_not_log_deletions):
    """
    ids_to_delete are afa_generated_unique ids
    """
    if ids_to_delete:
        if do_not_log_deletions is False:
            store_deleted_fabs(ids_to_delete)
        with timer("deleting stale FABS data", logger.info):
            update_award_ids = delete_stale_fabs(ids_to_delete)

    else:
        update_award_ids = []
        logger.info("Nothing to delete...")

    return update_award_ids
    def handle(self, *args, **options):
        logger.info('Starting row deletion...')

        if options['batches']:
            limit = options['batches'] * options['batchsize']
        else:
            limit = None
        with timer('executing query', logger.info):
            cursor = self.fabs_cursor(limit)
        batch_no = 1
        while (not options['batches']) or (batch_no <= options['batches']):
            message = 'Batch {} of {} rows'.format(batch_no,
                                                   options['batchsize'])
            with timer(message, logging.info):
                rows = cursor.fetchmany(options['batchsize'])
            if not rows:
                logger.info('No further rows; finished')
                return
            delete_ids = [r[0] for r in rows]
            with timer('deleting rows', logger.info):
                store_deleted_fabs(delete_ids)
                delete_stale_fabs(delete_ids)
            batch_no += 1
        logger.info('{} batches finished, complete'.format(batch_no - 1))
    def handle(self, *args, **options):
        logger.info('Starting FPDS bulk data load...')

        db_cursor = connections['data_broker'].cursor()
        ds_cursor = connection.cursor()
        fiscal_year = options.get('fiscal_year')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
        else:
            fiscal_year = 2017

        logger.info('Processing data for Fiscal Year ' + str(fiscal_year))

        with timer('Diff-ing FPDS data', logger.info):
            to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor,
                                                       ds_cursor=ds_cursor,
                                                       fiscal_year=fiscal_year)

        total_rows = len(to_insert)
        total_rows_delete = len(to_delete)

        if total_rows_delete > 0:
            with timer('Deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(to_delete=to_delete)

        if total_rows > 0:
            # Set lookups after deletions to only get latest
            self.set_lookup_maps()

            with timer('Get Broker FPDS data', logger.info):
                fpds_broker_data = self.get_fpds_data(
                    db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert)

            with timer('Loading POP Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows, pop_flag=True)

            with timer('Loading LE Location data', logger.info):
                self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Legal Entity data', logger.info):
                self.load_legal_entity(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Parent Award data', logger.info):
                self.load_parent_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Award data', logger.info):
                self.load_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Transaction Normalized data', logger.info):
                self.load_transaction_normalized(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            with timer('Loading Transaction FPDS data', logger.info):
                self.load_transaction_fpds(fpds_broker_data=fpds_broker_data, total_rows=total_rows)

            award_update_id_list = [award.id for award in award_lookup]

            with timer('Updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            with timer('Updating contract-specific awards to reflect their latest transaction info', logger.info):
                update_contract_awards(tuple(award_update_id_list))

            with timer('Updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...FINISHED!')
示例#25
0
    def handle(self, *args, **options):
        logger.info("Starting FABS data load script...")
        start_date = datetime.now(timezone.utc).strftime('%Y-%m-%d')

        fabs_load_db_id = lookups.EXTERNAL_DATA_TYPE_DICT['fabs']
        data_load_date_obj = ExternalDataLoadDate.objects.filter(
            external_data_type_id=fabs_load_db_id).first()

        if options.get("date"):  # if provided, use cli data
            load_from_date = options.get("date")[0]
        elif data_load_date_obj:  # else if last run is in DB, use that
            load_from_date = data_load_date_obj.last_load_date
        else:  # Default is yesterday at midnight
            load_from_date = (datetime.now(timezone.utc) -
                              timedelta(days=1)).strftime('%Y-%m-%d')

        logger.info('Processing data for FABS starting from %s' %
                    load_from_date)

        with timer('retrieving/diff-ing FABS Data', logger.info):
            upsert_transactions = self.get_fabs_transaction_ids(
                date=load_from_date)

        with timer("obtaining delete records", logger.info):
            ids_to_delete = self.get_fabs_records_to_delete(
                date=load_from_date)

        if ids_to_delete:
            self.store_deleted_fabs(ids_to_delete)

            # Delete FABS records by ID
            with timer("deleting stale FABS data", logger.info):
                self.delete_stale_fabs(ids_to_delete=ids_to_delete)
                del ids_to_delete
        else:
            logger.info("Nothing to delete...")

        if upsert_transactions:
            # Add FABS records
            with timer('inserting new FABS data', logger.info):
                self.insert_all_new_fabs(all_new_to_insert=upsert_transactions)

            # Update Awards based on changed FABS records
            with timer(
                    'updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(AWARD_UPDATE_ID_LIST))

            # Update AwardCategories based on changed FABS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(AWARD_UPDATE_ID_LIST))

            # Check the linkages from file C to FABS records and update any that are missing
            with timer('updating C->D linkages', logger.info):
                update_c_to_d_linkages('assistance')
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(
            external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']
        ).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.
                             EXTERNAL_DATA_TYPE_DICT['fabs']).save()

        logger.info('FABS UPDATE FINISHED!')
示例#26
0
    def handle(self, *args, **options):
        logger.info('Starting FABS bulk data load...')

        db_cursor = connections['data_broker'].cursor()
        ds_cursor = connection.cursor()
        fiscal_year = options.get('fiscal_year')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
        else:
            fiscal_year = 2017

        logger.info('Processing data for Fiscal Year ' + str(fiscal_year))

        with timer('Diff-ing FABS data', logger.info):
            to_insert, to_delete = self.diff_fabs_data(db_cursor=db_cursor,
                                                       ds_cursor=ds_cursor,
                                                       fiscal_year=fiscal_year)

        total_rows = len(to_insert)
        total_rows_delete = len(to_delete)

        if total_rows_delete > 0:
            with timer('Deleting stale FABS data', logger.info):
                self.delete_stale_fabs(to_delete=to_delete)

        if total_rows > 0:
            # Set lookups after deletions to only get latest
            self.set_lookup_maps()

            with timer('Get Broker FABS data', logger.info):
                fabs_broker_data = self.get_fabs_data(db_cursor=db_cursor,
                                                      fiscal_year=fiscal_year,
                                                      to_insert=to_insert)

            with timer('Loading POP Location data...', logger.info):
                self.load_locations(fabs_broker_data=fabs_broker_data,
                                    total_rows=total_rows,
                                    pop_flag=True)

            with timer('Loading LE Location data', logger.info):
                self.load_locations(fabs_broker_data=fabs_broker_data,
                                    total_rows=total_rows)

            with timer('Loading Legal Entity data', logger.info):
                self.load_legal_entity(fabs_broker_data=fabs_broker_data,
                                       total_rows=total_rows)

            with timer('Loading Award data', logger.info):
                self.load_awards(fabs_broker_data=fabs_broker_data,
                                 total_rows=total_rows)

            with timer('Loading Transaction Normalized data', logger.info):
                self.load_transaction_normalized(
                    fabs_broker_data=fabs_broker_data, total_rows=total_rows)

            with timer('Loading Transaction FABS data', logger.info):
                self.load_transaction_fabs(fabs_broker_data, total_rows)

            award_update_id_list = [award.id for award in award_lookup]

            with timer(
                    'Updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(award_update_id_list))

            with timer('Updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...FINISHED!')