def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') save = options.get('save') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data location insert', logger.info): self.update_location_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) if not options['contracts']: with timer('D2 historical data location insert', logger.info): self.update_location_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updating awarding agencies...') fiscal_year = options.get('fiscal_year')[0] page = options.get('page') limit = options.get('limit') page = page[0] if page else 1 limit = limit[0] if limit else 500000 if options.get('contracts', None): with timer('D1 (contracts/FPDS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D1', page=page, limit=limit) elif options.get('assistance', None): with timer( 'D2 (assistance/FABS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D2', page=page, limit=limit) else: logger.error('Not a valid data type: --assistance,--contracts') logger.info('Finished')
def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: data_load_date_obj = ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d") else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime("%Y-%m-%d") logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete() ExternalDataLoadDate( last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).save() logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def test_timer(capsys): 'Verify that timer helper executes without error' with timer(): print('Doing a thing') output = capsys.readouterr()[0] assert 'Beginning' in output assert 'finished' in output
def test_timer_times(capsys): 'Verify that timer shows longer times for slower operations' pattern = re.compile(r'([\d\.e\-]+) sec') with timer(): print('Doing a thing') output0 = capsys.readouterr()[0] time0 = float(pattern.search(output0).group(1)) with timer(): print('Doing a slower thing') time.sleep(0.1) output1 = capsys.readouterr()[0] time1 = float(pattern.search(output1).group(1)) assert time1 > time0
def test_timer_times(capsys): 'Verify that timer shows longer times for slower operations' pattern = re.compile(r'([\d\.e\-]+)s') with timer(): print('Doing a thing') output0 = capsys.readouterr()[0] time0 = float(pattern.search(output0).group(1)) with timer(): print('Doing a slower thing') time.sleep(0.1) output1 = capsys.readouterr()[0] time1 = float(pattern.search(output1).group(1)) assert time1 > time0
def test_timer(capsys): 'Verify that timer helper executes without error' with timer(): print('Doing a thing') output = capsys.readouterr()[0] assert 'Beginning' in output assert 'finished' in output
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): processing_start_datetime = datetime.now(timezone.utc) logger.info("Starting FABS data load script...") do_not_log_deletions = options["do_not_log_deletions"] # "Reload all" supersedes all other processing options. reload_all = options["reload_all"] if reload_all: submission_ids = None afa_ids = None start_datetime = None end_datetime = None else: submission_ids = tuple(options["submission_ids"]) if options["submission_ids"] else None afa_ids = read_afa_ids_from_file(options['afa_id_file']) if options['afa_id_file'] else None start_datetime = options["start_datetime"] end_datetime = options["end_datetime"] # If no other processing options were provided than this is an incremental load. is_incremental_load = not any((reload_all, submission_ids, afa_ids, start_datetime, end_datetime)) if is_incremental_load: last_load_date = get_last_load_date() submission_ids = get_new_submission_ids(last_load_date) logger.info("Processing data for FABS starting from %s" % last_load_date) if is_incremental_load and not submission_ids: logger.info("No new submissions. Exiting.") else: with timer("obtaining delete records", logger.info): ids_to_delete = get_fabs_records_to_delete(submission_ids, afa_ids, start_datetime, end_datetime) with timer("retrieving/diff-ing FABS Data", logger.info): ids_to_upsert = get_fabs_transaction_ids(submission_ids, afa_ids, start_datetime, end_datetime) update_award_ids = delete_fabs_transactions(ids_to_delete, do_not_log_deletions) upsert_fabs_transactions(ids_to_upsert, update_award_ids) if is_incremental_load: update_last_load_date("fabs", processing_start_datetime) logger.info("FABS UPDATE FINISHED!")
def handle(self, *args, **options): logger.info('Starting FPDS nightly data load...') if options.get('date'): date = options.get('date')[0] date = datetime.strptime(date, '%Y-%m-%d').date() else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FPDS starting from %s' % date) with timer('retrieving/diff-ing FPDS Data', logger.info): to_insert, ids_to_delete = self.get_fpds_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: with timer('deleting stale FPDS data', logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: # Add FPDS records with timer('inserting new FPDS data', logger.info): self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows) # Update Awards based on changed FPDS records with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) # Update FPDS-specific Awards based on the info in child transactions with timer('updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) # Update AwardCategories based on changed FPDS records with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Check the linkages from file C to FPDS records and update any that are missing with timer('updating C->D linkages', logger.info): update_c_to_d_linkages('contract') else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).save() logger.info('FPDS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1) date = get_last_load_date("fpds", default=default_last_load_date).date() processing_start_datetime = datetime.now(timezone.utc) logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run update_last_load_date("fpds", processing_start_datetime) logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids): if ids_to_upsert or externally_updated_award_ids: update_award_ids = copy(externally_updated_award_ids) if ids_to_upsert: with timer("inserting new FABS data", logger.info): update_award_ids.extend(insert_all_new_fabs(ids_to_upsert)) if update_award_ids: update_award_ids = tuple(set(update_award_ids)) # Convert to tuple and remove duplicates. with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(update_award_ids) with timer("updating award category variables", logger.info): update_award_categories(update_award_ids) with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("assistance") else: logger.info("Nothing to insert...")
def handle(self, *args, **options): logger.info('Starting FABS nightly data load...') # Use date provided or pull most recent ExternalDataLoadDate if options.get('date'): date = options.get('date')[0] else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FABS starting from %s' % date) # Retrieve FABS data with timer('retrieving/diff-ing FABS Data', logger.info): to_insert, ids_to_delete = self.get_fabs_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: # Create a file with the deletion IDs and place in a bucket for ElasticSearch self.send_deletes_to_s3(ids_to_delete) # Delete FABS records by ID with timer('deleting stale FABS data', logger.info): self.delete_stale_fabs(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: # Add FABS records with timer('inserting new FABS data', logger.info): self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows) # Update Awards based on changed FABS records with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) # Update AwardCategories based on changed FABS records with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Check the linkages from file C to FABS records and update any that are missing with timer('updating C->D linkages', logger.info): update_c_to_d_linkages('assistance') else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save() logger.info('FABS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info('Starting row deletion...') if options['batches']: limit = options['batches'] * options['batchsize'] else: limit = None with timer('executing query', logger.info): cursor = self.fabs_cursor(limit) batch_no = 1 while (not options['batches']) or (batch_no <= options['batches']): message = 'Batch {} of {} rows'.format(batch_no, options['batchsize']) with timer(message, logging.info): rows = cursor.fetchmany(options['batchsize']) if not rows: logger.info('No further rows; finished') return delete_ids = [r[0] for r in rows] with timer('deleting rows', logger.info): store_deleted_fabs(delete_ids) delete_stale_fabs(delete_ids) batch_no += 1 logger.info('{} batches finished, complete'.format(batch_no - 1))
def handle(self, *args, **options): logger.info('Starting updating awarding agencies...') fiscal_year = options.get('fiscal_year')[0] page = options.get('page') limit = options.get('limit') page = page[0] if page else 1 limit = limit[0] if limit else 500000 if options.get('contracts', None): with timer('D1 (contracts/FPDS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D1', page=page, limit=limit) elif options.get('assistance', None): with timer('D2 (assistance/FABS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D2', page=page, limit=limit) else: logger.error('Not a valid data type: --assistance,--contracts') logger.info('Finished')
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids): if ids_to_upsert or externally_updated_award_ids: update_award_ids = copy(externally_updated_award_ids) if ids_to_upsert: with timer("inserting new FABS data", logger.info): update_award_ids.extend(insert_all_new_fabs(ids_to_upsert)) if update_award_ids: update_award_ids = tuple(set( update_award_ids)) # Convert to tuple and remove duplicates. with timer( "updating awards to reflect their latest associated transaction info", logger.info): update_awards(update_award_ids) with timer("updating award category variables", logger.info): update_award_categories(update_award_ids) with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("assistance") else: logger.info("Nothing to insert...")
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') save = options.get('save') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data location insert', logger.info): self.update_location_transaction_contract( db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) if not options['contracts']: with timer('D2 historical data location insert', logger.info): self.update_location_transaction_assistance( db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) logger.info('FINISHED')
def delete_fabs_transactions(ids_to_delete, do_not_log_deletions): """ ids_to_delete are afa_generated_unique ids """ if ids_to_delete: if do_not_log_deletions is False: store_deleted_fabs(ids_to_delete) with timer("deleting stale FABS data", logger.info): update_award_ids = delete_stale_fabs(ids_to_delete) else: update_award_ids = [] logger.info("Nothing to delete...") return update_award_ids
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories() if all_records_flag else update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting row deletion...') if options['batches']: limit = options['batches'] * options['batchsize'] else: limit = None with timer('executing query', logger.info): cursor = self.fabs_cursor(limit) batch_no = 1 while ((not options['batches']) or (batch_no <= options['batches'])): message = 'Batch {} of {} rows'.format(batch_no, options['batchsize']) with timer(message, logging.info): rows = cursor.fetchmany(options['batchsize']) if not rows: logger.info('No further rows; finished') return ids = [r[0] for r in rows] with timer('deleting rows', logger.info): TransactionNormalized.objects.\ filter(assistance_data__afa_generated_unique__in=ids).delete() batch_no += 1 logger.info('{} batches finished, complete'.format(batch_no - 1))
def delete_fabs_transactions(ids_to_delete, do_not_log_deletions): """ ids_to_delete are afa_generated_unique ids """ if ids_to_delete: if do_not_log_deletions is False: store_deleted_fabs(ids_to_delete) with timer("deleting stale FABS data", logger.info): update_award_ids = delete_stale_fabs(ids_to_delete) else: update_award_ids = [] logger.info("Nothing to delete...") return update_award_ids
def handle(self, *args, **options): logger.info('Starting row deletion...') if options['batches']: limit = options['batches'] * options['batchsize'] else: limit = None with timer('executing query', logger.info): cursor = self.fabs_cursor(limit) batch_no = 1 while (not options['batches']) or (batch_no <= options['batches']): message = 'Batch {} of {} rows'.format(batch_no, options['batchsize']) with timer(message, logging.info): rows = cursor.fetchmany(options['batchsize']) if not rows: logger.info('No further rows; finished') return delete_ids = [r[0] for r in rows] with timer('deleting rows', logger.info): store_deleted_fabs(delete_ids) delete_stale_fabs(delete_ids) batch_no += 1 logger.info('{} batches finished, complete'.format(batch_no - 1))
def handle(self, *args, **options): logger.info('Starting FPDS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FPDS data', logger.info): to_insert, to_delete = self.diff_fpds_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FPDS data', logger.info): self.delete_stale_fpds(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FPDS data', logger.info): fpds_broker_data = self.get_fpds_data( db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Parent Award data', logger.info): self.load_parent_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized(fpds_broker_data=fpds_broker_data, total_rows=total_rows) with timer('Loading Transaction FPDS data', logger.info): self.load_transaction_fpds(fpds_broker_data=fpds_broker_data, total_rows=total_rows) award_update_id_list = [award.id for award in award_lookup] with timer('Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')
def handle(self, *args, **options): logger.info("Starting FABS data load script...") start_date = datetime.now(timezone.utc).strftime('%Y-%m-%d') fabs_load_db_id = lookups.EXTERNAL_DATA_TYPE_DICT['fabs'] data_load_date_obj = ExternalDataLoadDate.objects.filter( external_data_type_id=fabs_load_db_id).first() if options.get("date"): # if provided, use cli data load_from_date = options.get("date")[0] elif data_load_date_obj: # else if last run is in DB, use that load_from_date = data_load_date_obj.last_load_date else: # Default is yesterday at midnight load_from_date = (datetime.now(timezone.utc) - timedelta(days=1)).strftime('%Y-%m-%d') logger.info('Processing data for FABS starting from %s' % load_from_date) with timer('retrieving/diff-ing FABS Data', logger.info): upsert_transactions = self.get_fabs_transaction_ids( date=load_from_date) with timer("obtaining delete records", logger.info): ids_to_delete = self.get_fabs_records_to_delete( date=load_from_date) if ids_to_delete: self.store_deleted_fabs(ids_to_delete) # Delete FABS records by ID with timer("deleting stale FABS data", logger.info): self.delete_stale_fabs(ids_to_delete=ids_to_delete) del ids_to_delete else: logger.info("Nothing to delete...") if upsert_transactions: # Add FABS records with timer('inserting new FABS data', logger.info): self.insert_all_new_fabs(all_new_to_insert=upsert_transactions) # Update Awards based on changed FABS records with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FABS records with timer('updating award category variables', logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FABS records and update any that are missing with timer('updating C->D linkages', logger.info): update_c_to_d_linkages('assistance') else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs'] ).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups. EXTERNAL_DATA_TYPE_DICT['fabs']).save() logger.info('FABS UPDATE FINISHED!')
def handle(self, *args, **options): logger.info('Starting FABS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FABS data', logger.info): to_insert, to_delete = self.diff_fabs_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FABS data', logger.info): self.delete_stale_fabs(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FABS data', logger.info): fabs_broker_data = self.get_fabs_data(db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data...', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized( fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction FABS data', logger.info): self.load_transaction_fabs(fabs_broker_data, total_rows) award_update_id_list = [award.id for award in award_lookup] with timer( 'Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')