def handle(self, *args, **options): logger.info("Creating broker cursor") broker_cursor = connections["data_broker"].cursor() logger.info("Running MONTH_SCHEDULE_SQL") broker_cursor.execute(MONTH_SCHEDULE_SQL) logger.info("Getting month schedule values from cursor") month_schedule_values = dictfetchall(broker_cursor) logger.info("Running QUARTER_SCHEDULE_SQL") broker_cursor.execute(QUARTER_SCHEDULE_SQL) logger.info("Getting quarter schedule values from cursor") quarter_schedule_values = dictfetchall(broker_cursor) logger.info("Deleting existing DABS Submission Window Schedule") DABSSubmissionWindowSchedule.objects.all().delete() logger.info("Inserting DABS Submission Window Schedule into website") submission_schedule_objs = [ DABSSubmissionWindowSchedule(**values) for values in month_schedule_values ] submission_schedule_objs += [ DABSSubmissionWindowSchedule(**values) for values in quarter_schedule_values ] DABSSubmissionWindowSchedule.objects.bulk_create( submission_schedule_objs) logger.info( "DABS Submission Window Schedule loader finished successfully!")
def diff_fpds_data(self, db_cursor, ds_cursor, fiscal_year=None): db_query = 'SELECT detached_award_procurement_id ' \ 'FROM detached_award_procurement' db_arguments = [] ds_query = 'SELECT detached_award_procurement_id ' \ 'FROM transaction_fpds' ds_arguments = [] if fiscal_year: if db_arguments: db_query += ' AND' else: db_query += ' WHERE' if ds_arguments: ds_query += ' AND' else: ds_query += ' WHERE' fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) db_query += ' action_date::Date BETWEEN %s AND %s' db_arguments += [fy_begin, fy_end] ds_query += ' action_date::Date BETWEEN %s AND %s' ds_arguments += [fy_begin, fy_end] db_cursor.execute(db_query, db_arguments) ds_cursor.execute(ds_query, ds_arguments) db_dict = dictfetchall(db_cursor) ds_dict = dictfetchall(ds_cursor) db_set = set( map( lambda db_entry: int(db_entry['detached_award_procurement_id'] ), db_dict)) ds_set = set( map( lambda ds_entry: int(ds_entry['detached_award_procurement_id'] ), ds_dict)) to_insert = db_set - ds_set to_delete = ds_set - db_set logger.info('Number of records to insert: %s' % str(len(to_insert))) logger.info('Number of records to delete: %s' % str(len(to_delete))) # Return what is not currently in our database (to insert) and what we have that Broker does not (to delete) return to_insert, to_delete
def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor): """Get the matching TAS object from the broker database and save it to our running list.""" if tas_lookup_id in TAS_ID_TO_ACCOUNT: return TAS_ID_TO_ACCOUNT[tas_lookup_id] # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore db_cursor.execute("SELECT * FROM tas_lookup WHERE (financial_indicator2 <> 'F' OR financial_indicator2 IS NULL) " "AND account_num = %s", [tas_lookup_id]) tas_data = dictfetchall(db_cursor) if tas_data is None or len(tas_data) == 0: return None, 'Account number {} not found in Broker'.format(tas_lookup_id) tas_rendering_label = TreasuryAppropriationAccount.generate_tas_rendering_label( ata=tas_data[0]["allocation_transfer_agency"], aid=tas_data[0]["agency_identifier"], typecode=tas_data[0]["availability_type_code"], bpoa=tas_data[0]["beginning_period_of_availa"], epoa=tas_data[0]["ending_period_of_availabil"], mac=tas_data[0]["main_account_code"], sub=tas_data[0]["sub_account_code"] ) TAS_ID_TO_ACCOUNT[tas_lookup_id] = (TreasuryAppropriationAccount.objects. filter(tas_rendering_label=tas_rendering_label).first(), tas_rendering_label) return TAS_ID_TO_ACCOUNT[tas_lookup_id]
def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor): """Get the matching TAS object from the broker database and save it to our running list.""" if tas_lookup_id in TAS_ID_TO_ACCOUNT: return TAS_ID_TO_ACCOUNT[tas_lookup_id] # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore db_cursor.execute( "SELECT * FROM tas_lookup WHERE (financial_indicator2 <> 'F' OR financial_indicator2 IS NULL) " "AND account_num = %s", [tas_lookup_id], ) tas_data = dictfetchall(db_cursor) if tas_data is None or len(tas_data) == 0: return None, "Account number {} not found in Broker".format( tas_lookup_id) tas_rendering_label = TreasuryAppropriationAccount.generate_tas_rendering_label( ata=tas_data[0]["allocation_transfer_agency"], aid=tas_data[0]["agency_identifier"], typecode=tas_data[0]["availability_type_code"], bpoa=tas_data[0]["beginning_period_of_availa"], epoa=tas_data[0]["ending_period_of_availabil"], mac=tas_data[0]["main_account_code"], sub=tas_data[0]["sub_account_code"], ) TAS_ID_TO_ACCOUNT[tas_lookup_id] = ( TreasuryAppropriationAccount.objects.filter( tas_rendering_label=tas_rendering_label).first(), tas_rendering_label, ) return TAS_ID_TO_ACCOUNT[tas_lookup_id]
def get_award_data(db_cursor, award_type, max_id, internal_ids=None): """ Gets data for all new awards from broker with ID greater than the ones already stored for the given award type """ query_columns = ['internal_id'] # we need different columns depending on if it's a procurement or a grant if award_type == 'procurement': query_columns.extend([ 'contract_number', 'idv_reference_number', 'contracting_office_aid', 'contract_agency_code', 'contract_idv_agency_code', ]) else: # TODO contracting_office_aid equivalent? Do we even need it? query_columns.extend(['fain']) if isinstance(internal_ids, list) and len(internal_ids) > 0: ids_string = ','.join([str(id).lower() for id in internal_ids]) query = "SELECT {} FROM fsrs_{} WHERE internal_id = ANY(\'{{{}}}\'::text[]) ORDER BY id".format( ",".join(query_columns), award_type, ids_string) else: query = "SELECT {} FROM fsrs_{} WHERE id > {} ORDER BY id".format( ",".join(query_columns), award_type, str(max_id)) db_cursor.execute(query) return dictfetchall(db_cursor)
def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor): """Get the matching TAS object from the broker database and save it to our running list.""" if tas_lookup_id in TAS_ID_TO_ACCOUNT: return TAS_ID_TO_ACCOUNT[tas_lookup_id] # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore db_cursor.execute('SELECT * FROM tas_lookup WHERE account_num = %s', [tas_lookup_id]) tas_data = dictfetchall(db_cursor) # These or "" convert from none to a blank string, which is how the TAS table stores nulls q_kwargs = { "allocation_transfer_agency_id": tas_data[0]["allocation_transfer_agency"] or "", "agency_id": tas_data[0]["agency_identifier"] or "", "beginning_period_of_availability": tas_data[0]["beginning_period_of_availa"] or "", "ending_period_of_availability": tas_data[0]["ending_period_of_availabil"] or "", "availability_type_code": tas_data[0]["availability_type_code"] or "", "main_account_code": tas_data[0]["main_account_code"] or "", "sub_account_code": tas_data[0]["sub_account_code"] or "" } TAS_ID_TO_ACCOUNT[ tas_lookup_id] = TreasuryAppropriationAccount.objects.filter( Q(**q_kwargs)).first() return TAS_ID_TO_ACCOUNT[tas_lookup_id]
def get_fabs_data(date): db_cursor = connections['data_broker'].cursor() # The ORDER BY is important here because deletions must happen in a specific order and that order is defined # by the Broker's PK since every modification is a new row db_query = 'SELECT * ' \ 'FROM published_award_financial_assistance ' \ 'WHERE created_at >= %s ' \ 'AND (is_active IS True OR UPPER(correction_delete_indicatr) = \'D\')' db_args = [date] db_cursor.execute(db_query, db_args) db_rows = dictfetchall(db_cursor) # this returns an OrderedDict ids_to_delete = [] final_db_rows = [] # Iterate through the result dict and determine what needs to be deleted and what needs to be added for row in db_rows: if row['correction_delete_indicatr'] and row[ 'correction_delete_indicatr'].upper() == 'D': ids_to_delete.append(row['afa_generated_unique'].upper()) else: final_db_rows.append(row) logger.info('Number of records to insert/update: %s' % str(len(final_db_rows))) logger.info('Number of records to delete: %s' % str(len(ids_to_delete))) return final_db_rows, ids_to_delete
def get_broker_submission(self): self.db_cursor.execute( f""" select s.submission_id, ( select max(updated_at) from publish_history where submission_id = s.submission_id )::timestamptz as published_date, ( select max(updated_at) from certify_history where submission_id = s.submission_id )::timestamptz as certified_date, coalesce(s.cgac_code, s.frec_code) as toptier_code, s.reporting_start_date, s.reporting_end_date, s.reporting_fiscal_year, s.reporting_fiscal_period, s.is_quarter_format, s.d2_submission, s.publish_status_id from submission as s where s.submission_id = %s """, [self.submission_id], ) return dictfetchall(self.db_cursor)
def gather_new_duns(self, db_cursor, update_date, latest_broker_duns_id): new_duns_query = "SELECT * FROM duns " \ "WHERE updated_at > \'" + str(update_date) + "\' AND " \ "duns_id > " + str(latest_broker_duns_id) logger.info('Gathering duns created since last update') db_cursor.execute(new_duns_query) new_duns = dictfetchall(db_cursor) update_duns_query = "SELECT * FROM duns " \ "WHERE updated_at > \'" + str(update_date) + "\' AND " \ "duns_id <= " + str(latest_broker_duns_id) logger.info('Gathering duns updated since last update') db_cursor.execute(update_duns_query) update_duns = dictfetchall(db_cursor) return new_duns, update_duns
def get_fpds_data(self, db_cursor, fiscal_year=None, to_insert=None): query = 'SELECT * FROM detached_award_procurement' arguments = [] if to_insert: if arguments: query += ' AND' else: query += ' WHERE' query += ' detached_award_procurement_id IN %s' arguments += [tuple(to_insert)] if fiscal_year: if arguments: query += ' AND' else: query += ' WHERE' fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin, fy_end] query += ' ORDER BY detached_award_procurement_id' logger.info("Executing select query on Broker DB") db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") return dictfetchall(db_cursor)
def get_fpds_data(self, db_cursor, fiscal_year=None, to_insert=None): query = 'SELECT * FROM detached_award_procurement' arguments = [] if to_insert: if arguments: query += ' AND' else: query += ' WHERE' query += ' detached_award_procurement_id IN %s' arguments += [tuple(to_insert)] if fiscal_year: if arguments: query += ' AND' else: query += ' WHERE' fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin, fy_end] query += ' ORDER BY detached_award_procurement_id' logger.info("Executing select query on Broker DB") db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") return dictfetchall(db_cursor)
def tas_ids(self): sql = f""" select distinct c.tas_id {self.get_from_where(self.submission_attributes.submission_id)} and c.tas_id is not null """ self.db_cursor.execute(sql) return dictfetchall(self.db_cursor)
def diff_fpds_data(self, db_cursor, ds_cursor, fiscal_year=None): db_query = 'SELECT detached_award_procurement_id ' \ 'FROM detached_award_procurement' db_arguments = [] ds_query = 'SELECT detached_award_procurement_id ' \ 'FROM transaction_fpds' ds_arguments = [] if fiscal_year: if db_arguments: db_query += ' AND' else: db_query += ' WHERE' if ds_arguments: ds_query += ' AND' else: ds_query += ' WHERE' fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) db_query += ' action_date::Date BETWEEN %s AND %s' db_arguments += [fy_begin, fy_end] ds_query += ' action_date::Date BETWEEN %s AND %s' ds_arguments += [fy_begin, fy_end] db_cursor.execute(db_query, db_arguments) ds_cursor.execute(ds_query, ds_arguments) db_dict = dictfetchall(db_cursor) ds_dict = dictfetchall(ds_cursor) db_set = set(map(lambda db_entry: int(db_entry['detached_award_procurement_id']), db_dict)) ds_set = set(map(lambda ds_entry: int(ds_entry['detached_award_procurement_id']), ds_dict)) to_insert = db_set - ds_set to_delete = ds_set - db_set logger.info('Number of records to insert: %s' % str(len(to_insert))) logger.info('Number of records to delete: %s' % str(len(to_delete))) # Return what is not currently in our database (to insert) and what we have that Broker does not (to delete) return to_insert, to_delete
def load_executive_compensation(db_cursor, date, start_date): logger.info("Getting DUNS/Exec Comp data from broker based on the last pull date of %s..." % str(date)) # Get first page db_cursor.execute(EXEC_COMP_QUERY, [date]) exec_comp_query_dict = dictfetchall(db_cursor) total_rows = len(exec_comp_query_dict) logger.info('Updating Executive Compensation Data, {} rows coming from the Broker...'.format(total_rows)) start_time = datetime.now(timezone.utc) for index, row in enumerate(exec_comp_query_dict, 1): if not (index % 100): logger.info('Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) leo_update_dict = { "officer_1_name": row['high_comp_officer1_full_na'], "officer_1_amount": row['high_comp_officer1_amount'], "officer_2_name": row['high_comp_officer2_full_na'], "officer_2_amount": row['high_comp_officer2_amount'], "officer_3_name": row['high_comp_officer3_full_na'], "officer_3_amount": row['high_comp_officer3_amount'], "officer_4_name": row['high_comp_officer4_full_na'], "officer_4_amount": row['high_comp_officer4_amount'], "officer_5_name": row['high_comp_officer5_full_na'], "officer_5_amount": row['high_comp_officer5_amount'], } any_data = False for attr, value in leo_update_dict.items(): if value and value != "": any_data = True break if not any_data: continue duns_number = row['awardee_or_recipient_uniqu'] # Deal with multiples that we have in our LE table legal_entities = LegalEntity.objects.filter(recipient_unique_id=duns_number) if not legal_entities.exists(): logger.info('No record in data store for DUNS {}. Skipping...'.format(duns_number)) for le in legal_entities: leo, _ = LegalEntityOfficers.objects.get_or_create(legal_entity=le) for attr, value in leo_update_dict.items(): if value == "": value = None setattr(leo, attr, value) leo.save() # Update the date for the last time the data load was run update_last_load_date("exec_comp", start_date)
def get_broker_submission(self): self.db_cursor.execute( f""" with publish_certify_history as ( select distinct_pairings.submission_id, jsonb_agg( jsonb_build_object( 'published_date', ph.updated_at::timestamptz, 'certified_date', ch.updated_at::timestamptz ) ) AS history from ( select distinct submission_id, publish_history_id, certify_history_id from published_files_history where submission_id = %s ) as distinct_pairings left outer join publish_history as ph using (publish_history_id) left outer join certify_history as ch using (certify_history_id) group by distinct_pairings.submission_id ) select s.submission_id, ( select max(updated_at) from publish_history where submission_id = s.submission_id )::timestamptz as published_date, ( select max(updated_at) from certify_history where submission_id = s.submission_id )::timestamptz as certified_date, coalesce(s.cgac_code, s.frec_code) as toptier_code, s.reporting_start_date, s.reporting_end_date, s.reporting_fiscal_year, s.reporting_fiscal_period, s.is_quarter_format, s.d2_submission, s.publish_status_id, pch.history from submission as s inner join publish_certify_history as pch using (submission_id) """, [self.submission_id], ) return dictfetchall(self.db_cursor)
def bulk_treasury_appropriation_account_tas_lookup(rows, db_cursor): # Eliminate nulls, TAS we already know about, and remove duplicates. tas_lookup_ids = tuple( set(r["tas_id"] for r in rows if (r["tas_id"] and r["tas_id"] not in TAS_ID_TO_ACCOUNT))) if not tas_lookup_ids: return db_cursor.execute( """ select distinct account_num, allocation_transfer_agency, agency_identifier, availability_type_code, beginning_period_of_availa, ending_period_of_availabil, main_account_code, sub_account_code from tas_lookup where account_num in %s and ( financial_indicator2 != 'F' or financial_indicator2 is null ) """, [tas_lookup_ids], ) tas_data = dictfetchall(db_cursor) tas_rendering_labels = { tas["account_num"]: TreasuryAppropriationAccount.generate_tas_rendering_label( ata=tas["allocation_transfer_agency"], aid=tas["agency_identifier"], typecode=tas["availability_type_code"], bpoa=tas["beginning_period_of_availa"], epoa=tas["ending_period_of_availabil"], mac=tas["main_account_code"], sub=tas["sub_account_code"], ) for tas in tas_data } taa_objects = { taa.tas_rendering_label: taa for taa in TreasuryAppropriationAccount.objects.filter( tas_rendering_label__in=tas_rendering_labels.values()) } TAS_ID_TO_ACCOUNT.update({ tid: (taa_objects.get(tas_rendering_labels.get(tid)), tas_rendering_labels.get(tid)) for tid in tas_lookup_ids })
def handle(self, *args, **options): # Grab the data broker database connections if not options['test']: try: db_conn = connections['data_broker'] db_cursor = db_conn.cursor() except Exception as err: logger.critical('Could not connect to database. Is DATA_BROKER_DATABASE_URL set?') logger.critical(print(err)) return else: db_cursor = PhonyCursor() ds_cursor = connection.cursor() logger.info('Creating a temporary Office table copied from the Broker...') db_cursor.execute('SELECT office_name, office_code FROM office') all_offices = dictfetchall(db_cursor) all_offices_list = [] for o in all_offices: office_name = o['office_name'].replace("'", "''") office_code = o['office_code'].replace("'", "''") all_offices_list.append("('" + office_name + "','" + office_code + "')") all_offices_str = ', '.join(all_offices_list) ds_cursor.execute('CREATE TABLE temp_broker_office (office_name TEXT, office_code TEXT)') ds_cursor.execute('INSERT INTO temp_broker_office (office_name, office_code) VALUES ' + all_offices_str) logger.info('Deriving FABS awarding_office_names with awarding_office_codes from the temporary Office table...') ds_cursor.execute( "UPDATE transaction_fabs AS t_fabs " "SET awarding_office_name = office.office_name " "FROM temp_broker_office AS office " "WHERE t_fabs.awarding_office_code = office.office_code " " AND t_fabs.action_date >= '2018-10-01' " " AND t_fabs.awarding_office_name IS NULL " " AND t_fabs.awarding_office_code IS NOT NULL") logger.info(ds_cursor.rowcount) # logger.info('Made changes to {} records'.format(ds_cursor.results)) logger.info('Deriving FABS funding_office_names with funding_office_codes from the temporary Office table...') ds_cursor.execute( "UPDATE transaction_fabs AS t_fabs " "SET funding_office_name = office.office_name " "FROM temp_broker_office AS office " "WHERE t_fabs.funding_office_code = office.office_code " " AND t_fabs.action_date >= '2018-10-01' " " AND t_fabs.funding_office_name IS NULL " " AND t_fabs.funding_office_code IS NOT NULL") logger.info(ds_cursor.rowcount) # logger.info('Made changes to {} records'.format(ds_cursor.results)) logger.info('Dropping temporary Office table...') ds_cursor.execute('DROP TABLE temp_broker_office') logger.info('Finished derivations.')
def generate_schedules_from_broker(self): logger.info("Creating broker cursor") broker_cursor = connections["data_broker"].cursor() logger.info("Running MONTH_SCHEDULE_SQL") broker_cursor.execute(MONTH_SCHEDULE_SQL) logger.info("Getting month schedule values from cursor") month_schedule_values = dictfetchall(broker_cursor) logger.info("Running QUARTER_SCHEDULE_SQL") broker_cursor.execute(QUARTER_SCHEDULE_SQL) logger.info("Getting quarter schedule values from cursor") quarter_schedule_values = dictfetchall(broker_cursor) submission_schedule_objs = [DABSSubmissionWindowSchedule(**values) for values in month_schedule_values] submission_schedule_objs += [DABSSubmissionWindowSchedule(**values) for values in quarter_schedule_values] return submission_schedule_objs
def fetch_fpds_data_generator(dap_uid_list): start_time = datetime.now() db_cursor = connections["data_broker"].cursor() db_query = "SELECT * FROM detached_award_procurement WHERE detached_award_procurement_id IN ({});" total_uid_count = len(dap_uid_list) for i in range(0, total_uid_count, BATCH_FETCH_SIZE): max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count fpds_ids_batch = dap_uid_list[i:max_index] log_msg = "[{}] Fetching {}-{} out of {} records from broker" logger.info(log_msg.format(datetime.now() - start_time, i, max_index, total_uid_count)) db_cursor.execute(db_query.format(",".join(str(id) for id in fpds_ids_batch))) yield dictfetchall(db_cursor) # this returns an OrderedDict
def handle(self, *args, **options): logger.info('Creating broker cursor') broker_cursor = connections['data_broker'].cursor() logger.info('Running TOTAL_OBLIGATION_SQL') broker_cursor.execute(TOTAL_OBLIGATION_SQL) logger.info('Getting total obligation values from cursor') total_obligation_values = dictfetchall(broker_cursor) logger.info('Deleting all existing GTAS total obligation records in website') GTASTotalObligation.objects.all().delete() logger.info('Inserting GTAS total obligations records into website') total_obligation_objs = [GTASTotalObligation(**values) for values in total_obligation_values] GTASTotalObligation.objects.bulk_create(total_obligation_objs) logger.info('GTAS loader finished successfully!')
def broker_data(self, db_cursor, table_name, options): """Applies user-selected filters and gets rows from appropriate broker-side table""" filter_sql = [] filter_values = [] for (column, filter) in ( ('action_date_begin', ' AND (action_date IS NOT NULL) AND CAST(action_date AS DATE) >= %s'), ('action_date_end', ' AND (action_date IS NOT NULL) AND CAST(action_date AS DATE) <= %s'), ('cgac', ' AND awarding_agency_code = %s'), ): if options[column]: filter_sql.append(filter) filter_values.append(options[column]) filter_sql = "\n".join(filter_sql) sql = 'SELECT * FROM {} WHERE true {}'.format(table_name, filter_sql) db_cursor.execute(sql, filter_values) results = dictfetchall(db_cursor) logger.info('Acquired {}, there are {} rows.'.format(table_name, len(results))) return results
def fetch_fpds_data_generator(dap_uid_list): start_time = datetime.now() db_cursor = connections["data_broker"].cursor() db_query = "SELECT * FROM detached_award_procurement WHERE detached_award_procurement_id IN ({});" total_uid_count = len(dap_uid_list) for i in range(0, total_uid_count, BATCH_FETCH_SIZE): max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count fpds_ids_batch = dap_uid_list[i:max_index] log_msg = "[{}] Fetching {}-{} out of {} records from broker" logger.info(log_msg.format(datetime.now() - start_time, i + 1, max_index, total_uid_count)) db_cursor.execute(db_query.format(",".join(str(id) for id in fpds_ids_batch))) yield dictfetchall(db_cursor) # this returns an OrderedDict
def fetch_fabs_data_generator(dap_uid_list): db_cursor = connections["data_broker"].cursor() db_query = """ SELECT * FROM published_award_financial_assistance WHERE published_award_financial_assistance_id IN %s; """ total_uid_count = len(dap_uid_list) for i in range(0, total_uid_count, BATCH_FETCH_SIZE): start_time = time.perf_counter() max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count fabs_ids_batch = dap_uid_list[i:max_index] log_msg = "Fetching {}-{} out of {} records from broker" logger.info(log_msg.format(i + 1, max_index, total_uid_count)) db_cursor.execute(db_query, [tuple(fabs_ids_batch)]) logger.info("Fetching records took {:.2f}s".format(time.perf_counter() - start_time)) yield dictfetchall(db_cursor)
def load_executive_compensation(db_cursor, duns_list=None): """ Loads File E from the broker. db_cursor should be the db_cursor for Broker """ if duns_list is None: duns_list = list( set(LegalEntity.objects.all().exclude( recipient_unique_id__isnull=True).values_list( "recipient_unique_id", flat=True))) duns_list = [str(x) for x in duns_list] # File E db_cursor.execute(FILE_E_QUERY, [tuple(duns_list)]) e_data = dictfetchall(db_cursor) logger.info("Updating Executive Compensation, entries: {}".format( len(e_data))) for row in e_data: leo_update_dict = { "officer_1_name": row['high_comp_officer1_full_na'], "officer_1_amount": row['high_comp_officer1_amount'], "officer_2_name": row['high_comp_officer2_full_na'], "officer_2_amount": row['high_comp_officer2_amount'], "officer_3_name": row['high_comp_officer3_full_na'], "officer_3_amount": row['high_comp_officer3_amount'], "officer_4_name": row['high_comp_officer4_full_na'], "officer_4_amount": row['high_comp_officer4_amount'], "officer_5_name": row['high_comp_officer5_full_na'], "officer_5_amount": row['high_comp_officer5_amount'], } leo = LegalEntityOfficers.objects.get( legal_entity__recipient_unique_id=row['awardee_or_recipient_uniqu'] ) for attr, value in leo_update_dict.items(): if value == "": value = None setattr(leo, attr, value) leo.save()
def handle(self, *args, **options): logger.info('Creating broker cursor') broker_cursor = connections['data_broker'].cursor() logger.info('Running TOTAL_OBLIGATION_SQL') broker_cursor.execute(TOTAL_OBLIGATION_SQL) logger.info('Getting total obligation values from cursor') total_obligation_values = dictfetchall(broker_cursor) logger.info( 'Deleting all existing GTAS total obligation records in website') GTASTotalObligation.objects.all().delete() logger.info('Inserting GTAS total obligations records into website') total_obligation_objs = [ GTASTotalObligation(**values) for values in total_obligation_values ] GTASTotalObligation.objects.bulk_create(total_obligation_objs) logger.info('GTAS loader finished successfully!')
def fetch_fabs_data_generator(dap_uid_list): db_cursor = connections["data_broker"].cursor() db_query = """ SELECT * FROM published_award_financial_assistance WHERE published_award_financial_assistance_id IN %s; """ total_uid_count = len(dap_uid_list) for i in range(0, total_uid_count, BATCH_FETCH_SIZE): start_time = time.perf_counter() max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count fabs_ids_batch = dap_uid_list[i:max_index] log_msg = "Fetching {}-{} out of {} records from broker" logger.info(log_msg.format(i + 1, max_index, total_uid_count)) db_cursor.execute(db_query, [tuple(fabs_ids_batch)]) logger.info( "Fetching records took {:.2f}s".format(time.perf_counter() - start_time)) yield dictfetchall(db_cursor)
def get_fabs_data(self, db_cursor, fiscal_year=None, to_insert=None): query = 'SELECT * FROM published_award_financial_assistance WHERE is_active=TRUE' arguments = [] if to_insert: query += ' AND published_award_financial_assistance_id IN %s' arguments += [tuple(to_insert)] if fiscal_year: fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) query += ' AND action_date::Date BETWEEN %s AND %s' arguments += [fy_begin, fy_end] query += ' ORDER BY published_award_financial_assistance_id' logger.info("Executing select query on Broker DB") db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") return dictfetchall(db_cursor)
def get_fabs_data(self, db_cursor, fiscal_year=None, to_insert=None): query = 'SELECT * FROM published_award_financial_assistance WHERE is_active=TRUE' arguments = [] if to_insert: query += ' AND published_award_financial_assistance_id IN %s' arguments += [tuple(to_insert)] if fiscal_year: fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) query += ' AND action_date::Date BETWEEN %s AND %s' arguments += [fy_begin, fy_end] query += ' ORDER BY published_award_financial_assistance_id' logger.info("Executing select query on Broker DB") db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") return dictfetchall(db_cursor)
def handle(self, *args, **options): logger.info("Creating broker cursor") broker_cursor = connections["data_broker"].cursor() logger.info("Running TOTAL_OBLIGATION_SQL") broker_cursor.execute(self.broker_fetch_sql()) logger.info("Getting total obligation values from cursor") total_obligation_values = dictfetchall(broker_cursor) logger.info( "Deleting all existing GTAS total obligation records in website") GTASSF133Balances.objects.all().delete() logger.info("Inserting GTAS total obligations records into website") total_obligation_objs = [ GTASSF133Balances(**values) for values in total_obligation_values ] GTASSF133Balances.objects.bulk_create(total_obligation_objs) self._execute_dml_sql(self.tas_fk_sql(), "Populating TAS foreign keys") logger.info("GTAS loader finished successfully!")
def process_data(self): broker_cursor = connections["data_broker"].cursor() logger.info("Extracting data from Broker") broker_cursor.execute(self.broker_fetch_sql) total_obligation_values = dictfetchall(broker_cursor) logger.info("Deleting all existing GTAS total obligation records in website") deletes = GTASSF133Balances.objects.all().delete() logger.info(f"Deleted {deletes[0]:,} records") logger.info("Transforming new GTAS records") total_obligation_objs = [GTASSF133Balances(**values) for values in total_obligation_values] logger.info("Loading new GTAS records into database") new_rec_count = len(GTASSF133Balances.objects.bulk_create(total_obligation_objs)) logger.info(f"Loaded: {new_rec_count:,} records") load_rec = self._execute_dml_sql(self.tas_fk_sql, "Populating TAS foreign keys") logger.info(f"Set {load_rec:,} TAS FKs in GTAS table, {new_rec_count - load_rec:,} NULLs") delete_rec = self._execute_dml_sql(self.financing_account_sql, "Drop Financing Account TAS") logger.info(f"Deleted {delete_rec:,} records in GTAS table due to invalid TAS") logger.info("Committing transaction to database")
def get_award_data(db_cursor, award_type, max_id): """ Gets data for all new awards from broker with ID greater than the ones already stored for the given award type """ query_columns = ['internal_id'] # we need different columns depending on if it's a procurement or a grant if award_type == 'procurement': query_columns.extend([ 'contract_number', 'idv_reference_number', 'contracting_office_aid', 'contract_agency_code', 'contract_idv_agency_code' ]) else: # TODO contracting_office_aid equivalent? Do we even need it? query_columns.extend(['fain']) query = "SELECT " + ",".join(query_columns) + " FROM fsrs_" + award_type +\ " WHERE id > " + str(max_id) + " ORDER BY id" db_cursor.execute(query) logger.info("Running dictfetchall on db_cursor") return dictfetchall(db_cursor)
def fetch_fabs_data_generator(dap_uid_list): db_cursor = connection.cursor() db_query = """ SELECT * FROM source_assistance_transaction WHERE published_award_financial_assistance_id IN %s; """ total_uid_count = len(dap_uid_list) for i in range(0, total_uid_count, BATCH_FETCH_SIZE): start_time = time.perf_counter() max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count fabs_ids_batch = dap_uid_list[i:max_index] logger.info( f"Fetching {i + 1}-{max_index} out of {total_uid_count} records from source table" ) db_cursor.execute(db_query, [tuple(fabs_ids_batch)]) logger.info( "Fetching records took {:.2f}s".format(time.perf_counter() - start_time)) yield dictfetchall(db_cursor)
def load_subawards(submission_attributes, db_cursor): """ Loads File F from the broker. db_cursor should be the db_cursor for Broker """ # A list of award id's to update the subaward accounts and totals on award_ids_to_update = set() # Get a list of PIIDs from this submission awards_for_sub = Award.objects.filter(transaction__submission=submission_attributes).distinct() piids = list(awards_for_sub.values_list("piid", flat=True)) fains = list(awards_for_sub.values_list("fain", flat=True)) # This allows us to handle an empty list in the SQL without changing the query piids.append(None) fains.append(None) # D1 File F db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)]) d1_f_data = dictfetchall(db_cursor) logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data))) d1_create_count = 0 d1_update_count = 0 d1_empty_count = 0 for row in d1_f_data: if row['subcontract_num'] is None: if row['id'] is not None and row['subcontract_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d1_empty_count += 1 continue # Get the agency agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-contract to # We perform this lookup by finding the Award containing a transaction with # a matching parent award id, piid, and submission attributes award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__contract_data__piid=row['piid'], transaction__contract_data__isnull=False, transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by("-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(row['subcontract_num'], row['piid'], row['parent_award_id'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.recipient_name = row['company_name'] recipient.location = get_or_create_location(row, location_d1_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) d1_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': None, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subcontract_num'], 'amount': row['subcontract_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['recovery_model_q1'], 'recovery_model_question2': row['recovery_model_q2'], 'action_date': row['subcontract_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': row['naics'], 'naics_description': row['naics_description'], } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'], award=award, defaults=d1_f_dict) if created: d1_create_count += 1 else: d1_update_count += 1 # D2 File F db_cursor.execute(D2_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(fains)]) d2_f_data = dictfetchall(db_cursor) logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data))) d2_create_count = 0 d2_update_count = 0 d2_empty_count = 0 for row in d2_f_data: if row['subaward_num'] is None: if row['id'] is not None and row['subaward_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d2_empty_count += 1 continue agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-award to # We perform this lookup by finding the Award containing a transaction with # a matching fain and submission. If this fails, try submission and uri if row['fain'] and len(row['fain']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__fain=row['fain']).distinct().order_by("-date_signed").first() # Couldn't find a match on FAIN, try URI if it exists if not award and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, transaction__submission=submission_attributes, transaction__assistance_data__isnull=False, transaction__assistance_data__uri=row['uri']).distinct().first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; skipping...".format(row['subaward_num'], row['fain'], row['uri'])) continue award_ids_to_update.add(award.id) # Find the recipient by looking up by duns recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns']) if created: recipient_name = row['awardee_name'] if recipient_name is None: recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" recipient.recipient_name = recipient_name recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d2_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) # Get CFDA Program cfda = Cfda.objects.filter(program_number=row['cfda_number']).first() d2_f_dict = { 'award': award, 'recipient': recipient, 'submission': submission_attributes, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['project_description'], 'recovery_model_question1': row['compensation_q1'], 'recovery_model_question2': row['compensation_q2'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'naics': None, 'naics_description': None, } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subaward_num'], award=award, defaults=d2_f_dict) if created: d2_create_count += 1 else: d2_update_count += 1 # Update Award objects with subaward aggregates update_award_subawards(tuple(award_ids_to_update)) logger.info( """Submission {} Subcontracts created: {} Subcontracts updated: {} Empty subcontract rows: {} Subawards created: {} Subawards updated: {} Empty subaward rows: {}""".format(submission_attributes.broker_submission_id, d1_create_count, d1_update_count, d1_empty_count, d2_create_count, d2_update_count, d2_empty_count))
def get_file_b(submission_attributes, db_cursor): """ Get broker File B data for a specific submission. This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP) submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object class code but with one of the direct reimbursable flags set to NULL. From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when it's four digits. Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object classes, it will squash the offending records together so that all financial totals are reporting as a single object class/program activity/TAS record as expected. If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be necessary, we can go back to selecting * from the broker's File B data. Args: submission_attributes: submission object currently being loaded db_cursor: db connection info """ submission_id = submission_attributes.broker_submission_id # does this file B have the dupe object class edge case? check_dupe_oc = ("SELECT count(*) " "FROM certified_object_class_program_activity " "WHERE submission_id = %s " "AND length(object_class) = 4 " "GROUP BY tas_id, program_activity_code, object_class " "HAVING COUNT(*) > 1") db_cursor.execute(check_dupe_oc, [submission_id]) dupe_oc_count = len(dictfetchall(db_cursor)) if dupe_oc_count == 0: # there are no object class duplicates, so proceed as usual db_cursor.execute( "SELECT * FROM certified_object_class_program_activity WHERE submission_id = %s", [submission_id]) else: # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas, # so combine the records in question combine_dupe_oc = ( "SELECT " "submission_id, " "job_id, " "agency_identifier, " "allocation_transfer_agency, " "availability_type_code, " "beginning_period_of_availa, " "ending_period_of_availabil, " "main_account_code, " "RIGHT(object_class, 3) AS object_class, " "CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '1' THEN 'D' " "WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '2' THEN 'R' " "ELSE by_direct_reimbursable_fun END AS by_direct_reimbursable_fun, " "tas, " "tas_id, " "program_activity_code, " "program_activity_name, " "sub_account_code, " "SUM(deobligations_recov_by_pro_cpe) AS deobligations_recov_by_pro_cpe, " "SUM(gross_outlay_amount_by_pro_cpe) AS gross_outlay_amount_by_pro_cpe, " "SUM(gross_outlay_amount_by_pro_fyb) AS gross_outlay_amount_by_pro_fyb, " "SUM(gross_outlays_delivered_or_cpe) AS gross_outlays_delivered_or_cpe, " "SUM(gross_outlays_delivered_or_fyb) AS gross_outlays_delivered_or_fyb, " "SUM(gross_outlays_undelivered_cpe) AS gross_outlays_undelivered_cpe, " "SUM(gross_outlays_undelivered_fyb) AS gross_outlays_undelivered_fyb, " "SUM(obligations_delivered_orde_cpe) AS obligations_delivered_orde_cpe, " "SUM(obligations_delivered_orde_fyb) AS obligations_delivered_orde_fyb, " "SUM(obligations_incurred_by_pr_cpe) AS obligations_incurred_by_pr_cpe, " "SUM(obligations_undelivered_or_cpe) AS obligations_undelivered_or_cpe, " "SUM(obligations_undelivered_or_fyb) AS obligations_undelivered_or_fyb, " "SUM(ussgl480100_undelivered_or_cpe) AS ussgl480100_undelivered_or_cpe, " "SUM(ussgl480100_undelivered_or_fyb) AS ussgl480100_undelivered_or_fyb, " "SUM(ussgl480200_undelivered_or_cpe) AS ussgl480200_undelivered_or_cpe, " "SUM(ussgl480200_undelivered_or_fyb) AS ussgl480200_undelivered_or_fyb, " "SUM(ussgl483100_undelivered_or_cpe) AS ussgl483100_undelivered_or_cpe, " "SUM(ussgl483200_undelivered_or_cpe) AS ussgl483200_undelivered_or_cpe, " "SUM(ussgl487100_downward_adjus_cpe) AS ussgl487100_downward_adjus_cpe, " "SUM(ussgl487200_downward_adjus_cpe) AS ussgl487200_downward_adjus_cpe, " "SUM(ussgl488100_upward_adjustm_cpe) AS ussgl488100_upward_adjustm_cpe, " "SUM(ussgl488200_upward_adjustm_cpe) AS ussgl488200_upward_adjustm_cpe, " "SUM(ussgl490100_delivered_orde_cpe) AS ussgl490100_delivered_orde_cpe, " "SUM(ussgl490100_delivered_orde_fyb) AS ussgl490100_delivered_orde_fyb, " "SUM(ussgl490200_delivered_orde_cpe) AS ussgl490200_delivered_orde_cpe, " "SUM(ussgl490800_authority_outl_cpe) AS ussgl490800_authority_outl_cpe, " "SUM(ussgl490800_authority_outl_fyb) AS ussgl490800_authority_outl_fyb, " "SUM(ussgl493100_delivered_orde_cpe) AS ussgl493100_delivered_orde_cpe, " "SUM(ussgl497100_downward_adjus_cpe) AS ussgl497100_downward_adjus_cpe, " "SUM(ussgl497200_downward_adjus_cpe) AS ussgl497200_downward_adjus_cpe, " "SUM(ussgl498100_upward_adjustm_cpe) AS ussgl498100_upward_adjustm_cpe, " "SUM(ussgl498200_upward_adjustm_cpe) AS ussgl498200_upward_adjustm_cpe " "FROM certified_object_class_program_activity " "WHERE submission_id = %s " "GROUP BY " "submission_id, " "job_id, " "agency_identifier, " "allocation_transfer_agency, " "availability_type_code, " "beginning_period_of_availa, " "ending_period_of_availabil, " "main_account_code, " "RIGHT(object_class, 3), " "CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '1' THEN 'D' " "WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '2' THEN 'R' " "ELSE by_direct_reimbursable_fun END, " "program_activity_code, " "program_activity_name, " "sub_account_code, " "tas, " "tas_id") logger.info( "Found {} duplicated File B 4 digit object codes in submission {}. " "Aggregating financial values.".format(dupe_oc_count, submission_id)) # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together db_cursor.execute(combine_dupe_oc, [submission_id]) data = dictfetchall(db_cursor) return data
def handle_loading(self, db_cursor, *args, **options): def signal_handler(signal, frame): transaction.set_rollback(True) raise Exception('Received interrupt signal. Aborting...') signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) submission_id = options['submission_id'][0] logger.info( 'Getting submission {} from broker...'.format(submission_id)) db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id]) submission_data = dictfetchall(db_cursor) logger.info( 'Finished getting submission {} from broker'.format(submission_id)) if len(submission_data) == 0: raise CommandError('Could not find submission with id ' + str(submission_id)) elif len(submission_data) > 1: raise CommandError('Found multiple submissions with id ' + str(submission_id)) submission_data = submission_data[0].copy() broker_submission_id = submission_data['submission_id'] del submission_data[ 'submission_id'] # We use broker_submission_id, submission_id is our own PK submission_attributes = get_submission_attributes( broker_submission_id, submission_data) logger.info('Getting File A data') db_cursor.execute( 'SELECT * FROM certified_appropriation WHERE submission_id = %s', [submission_id]) appropriation_data = dictfetchall(db_cursor) logger.info('Acquired File A (appropriation) data for ' + str(submission_id) + ', there are ' + str(len(appropriation_data)) + ' rows.') logger.info('Loading File A data') start_time = datetime.now() load_file_a(submission_attributes, appropriation_data, db_cursor) logger.info( 'Finished loading File A data, took {}'.format(datetime.now() - start_time)) logger.info('Getting File B data') prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor) logger.info( 'Acquired File B (program activity object class) data for ' + str(submission_id) + ', there are ' + str(len(prg_act_obj_cls_data)) + ' rows.') logger.info('Loading File B data') start_time = datetime.now() load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor) logger.info( 'Finished loading File B data, took {}'.format(datetime.now() - start_time)) logger.info('Getting File C data') # we dont have sub-tier agency info, so we'll do our best # to match them to the more specific award records award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\ format(submission_id) if isinstance(db_cursor, PhonyCursor): # spoofed data for test award_financial_frame = pd.DataFrame( db_cursor.db_responses[award_financial_query]) else: # real data award_financial_frame = pd.read_sql(award_financial_query, connections['data_broker']) logger.info( 'Acquired File C (award financial) data for {}, there are {} rows.' .format(submission_id, award_financial_frame.shape[0])) logger.info('Loading File C data') start_time = datetime.now() awards_touched = load_file_c(submission_attributes, db_cursor, award_financial_frame) logger.info( 'Finished loading File C data, took {}'.format(datetime.now() - start_time)) if not options['nosubawards']: try: start_time = datetime.now() logger.info('Loading subaward data...') load_subawards(submission_attributes, awards_touched, db_cursor) logger.info('Finshed loading subaward data, took {}'.format( datetime.now() - start_time)) except Exception: logger.warning("Error loading subawards for this submission") else: logger.info('Skipping subawards due to flags...') # Once all the files have been processed, run any global cleanup/post-load tasks. # Cleanup not specific to this submission is run in the `.handle` method logger.info('Successfully loaded broker submission {}.'.format( options['submission_id'][0]))
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } fad_field_map = { "type": "assistance_type", "description": "award_description", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False ) lel_bulk.append(legal_entity_location) logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False ) pop_bulk.append(pop_location) logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get('awardee_or_recipient_legal', '') legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity_value_map = { "location": lel_bulk[index - 1], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') awarding_agency_list = [] funding_agency_list = [] logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) funding_agency = Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"] ) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get('fain'), uri=row.get('uri'), save=False ) award_bulk.append(award) award_update_id_list.append(award.id) logger.info('Bulk creating {} award rows...'.format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction_normalized.fiscal_year = fy(transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create(transaction_normalized_bulk) except IntegrityError: logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ') for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info('Bulk creating TransactionFABS rows...') try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ')
def get_file_b(submission_attributes, db_cursor): """ Get broker File B data for a specific submission. This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP) submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object class code but with one of the direct reimbursable flags set to NULL. From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when it's four digits. Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object classes, it will squash the offending records together so that all financial totals are reporting as a single object class/program activity/TAS record as expected. If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be necessary, we can go back to selecting * from the broker's File B data. Args: submission_attributes: submission object currently being loaded db_cursor: db connection info """ submission_id = submission_attributes.submission_id # does this file B have the dupe object class edge case? check_dupe_oc = f""" select count(*) from certified_object_class_program_activity where submission_id = %s and length(object_class) = 4 group by tas_id, program_activity_code, object_class, disaster_emergency_fund_code having count(*) > 1 """ db_cursor.execute(check_dupe_oc, [submission_id]) dupe_oc_count = len(dictfetchall(db_cursor)) if dupe_oc_count == 0: # there are no object class duplicates, so proceed as usual db_cursor.execute( "select * from certified_object_class_program_activity where submission_id = %s", [submission_id]) else: # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas, # so combine the records in question combine_dupe_oc = f""" select submission_id, job_id, agency_identifier, allocation_transfer_agency, availability_type_code, beginning_period_of_availa, ending_period_of_availabil, main_account_code, right(object_class, 3) as object_class, case when length(object_class) = 4 and left(object_class, 1) = '1' then 'D' when length(object_class) = 4 and left(object_class, 1) = '2' then 'R' else by_direct_reimbursable_fun end as by_direct_reimbursable_fun, tas, tas_id, program_activity_code, program_activity_name, sub_account_code, sum(deobligations_recov_by_pro_cpe) as deobligations_recov_by_pro_cpe, sum(gross_outlay_amount_by_pro_cpe) as gross_outlay_amount_by_pro_cpe, sum(gross_outlay_amount_by_pro_fyb) as gross_outlay_amount_by_pro_fyb, sum(gross_outlays_delivered_or_cpe) as gross_outlays_delivered_or_cpe, sum(gross_outlays_delivered_or_fyb) as gross_outlays_delivered_or_fyb, sum(gross_outlays_undelivered_cpe) as gross_outlays_undelivered_cpe, sum(gross_outlays_undelivered_fyb) as gross_outlays_undelivered_fyb, sum(obligations_delivered_orde_cpe) as obligations_delivered_orde_cpe, sum(obligations_delivered_orde_fyb) as obligations_delivered_orde_fyb, sum(obligations_incurred_by_pr_cpe) as obligations_incurred_by_pr_cpe, sum(obligations_undelivered_or_cpe) as obligations_undelivered_or_cpe, sum(obligations_undelivered_or_fyb) as obligations_undelivered_or_fyb, sum(ussgl480100_undelivered_or_cpe) as ussgl480100_undelivered_or_cpe, sum(ussgl480100_undelivered_or_fyb) as ussgl480100_undelivered_or_fyb, sum(ussgl480200_undelivered_or_cpe) as ussgl480200_undelivered_or_cpe, sum(ussgl480200_undelivered_or_fyb) as ussgl480200_undelivered_or_fyb, sum(ussgl483100_undelivered_or_cpe) as ussgl483100_undelivered_or_cpe, sum(ussgl483200_undelivered_or_cpe) as ussgl483200_undelivered_or_cpe, sum(ussgl487100_downward_adjus_cpe) as ussgl487100_downward_adjus_cpe, sum(ussgl487200_downward_adjus_cpe) as ussgl487200_downward_adjus_cpe, sum(ussgl488100_upward_adjustm_cpe) as ussgl488100_upward_adjustm_cpe, sum(ussgl488200_upward_adjustm_cpe) as ussgl488200_upward_adjustm_cpe, sum(ussgl490100_delivered_orde_cpe) as ussgl490100_delivered_orde_cpe, sum(ussgl490100_delivered_orde_fyb) as ussgl490100_delivered_orde_fyb, sum(ussgl490200_delivered_orde_cpe) as ussgl490200_delivered_orde_cpe, sum(ussgl490800_authority_outl_cpe) as ussgl490800_authority_outl_cpe, sum(ussgl490800_authority_outl_fyb) as ussgl490800_authority_outl_fyb, sum(ussgl493100_delivered_orde_cpe) as ussgl493100_delivered_orde_cpe, sum(ussgl497100_downward_adjus_cpe) as ussgl497100_downward_adjus_cpe, sum(ussgl497200_downward_adjus_cpe) as ussgl497200_downward_adjus_cpe, sum(ussgl498100_upward_adjustm_cpe) as ussgl498100_upward_adjustm_cpe, sum(ussgl498200_upward_adjustm_cpe) as ussgl498200_upward_adjustm_cpe, disaster_emergency_fund_code from certified_object_class_program_activity where submission_id = %s group by submission_id, job_id, agency_identifier, allocation_transfer_agency, availability_type_code, beginning_period_of_availa, ending_period_of_availabil, main_account_code, right(object_class, 3), case when length(object_class) = 4 and left(object_class, 1) = '1' then 'D' when length(object_class) = 4 and left(object_class, 1) = '2' then 'R' else by_direct_reimbursable_fun end, program_activity_code, program_activity_name, sub_account_code, tas, tas_id, disaster_emergency_fund_code """ logger.info( f"Found {dupe_oc_count:,} duplicated File B 4 digit object codes in submission {submission_id}. " f"Aggregating financial values.") # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together db_cursor.execute(combine_dupe_oc, [submission_id]) data = dictfetchall(db_cursor) return data
def update_location_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000, save=True): list_of_columns = (', '.join(['piid', 'award_modification_amendme', 'legal_entity_country_code', 'place_of_perform_country_c', 'legal_entity_state_code', 'place_of_performance_state'])) query = "SELECT {} FROM detached_award_procurement".format(list_of_columns) arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) logger.info("Getting total rows") total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): if not (index % 100): logger.info('D1 File Fix: Fixing row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) transaction = TransactionNormalized.objects. \ filter(award__piid=row['piid'], modification_number=row['award_modification_amendme']).first() if not transaction: logger.info('Couldn\'t find transaction with piid ({}) and modification_number({}). Skipping.'. format(row['piid'], row['award_modification_amendme'])) continue if transaction.recipient and transaction.recipient.location: lel = transaction.recipient.location location_country_code = row['legal_entity_country_code'] state_code = row['legal_entity_state_code'] lel = update_country_code("d1", lel, location_country_code, state_code) lel.save() if transaction.place_of_performance: pop = transaction.place_of_performance location_country_code = row['place_of_perform_country_c'] state_code = row['place_of_performance_state'] pop = update_country_code("d1", pop, location_country_code, state_code) pop.save()
def update_location_transaction_assistance(db_cursor, fiscal_year=2017, page=1, limit=500000, save=True): list_of_columns = (', '.join(['fain', 'uri', 'award_modification_amendme', 'legal_entity_country_code', 'place_of_perform_country_c', 'place_of_performance_code', 'legal_entity_state_code', 'legal_entity_state_name', 'place_of_perform_state_nam'])) # get the transaction values we need # TODO: Modify cutoff date to match nightly loads query = "SELECT {} FROM published_award_financial_assistance WHERE is_active=TRUE " \ "AND updated_at < '09/20/2017'".format(list_of_columns) arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: query += " AND" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) logger.info("Getting total rows") total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of location data") start_time = datetime.now() trans_queryset = TransactionNormalized.objects.prefetch_related('award', 'recipient__location') for index, row in enumerate(award_financial_assistance_data, 1): if not (index % 100): logger.info('Location Fix: Fixing row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) # Could also use contract_data__fain transaction = trans_queryset.filter(award__fain=row['fain'], award__uri=row['uri'], modification_number=row['award_modification_amendme']).first() if not transaction: logger.info('Couldn\'t find transaction with fain ({}), uri({}), and modification_number({}). ' 'Skipping.'.format(row['fain'], row['uri'], row['award_modification_amendme'])) continue if transaction.recipient and transaction.recipient.location: lel = transaction.recipient.location location_country_code = row['legal_entity_country_code'] state_code = row['legal_entity_state_code'] state_name = row['legal_entity_state_name'] lel = update_country_code("d2", lel, location_country_code, state_code, state_name) lel.save() if transaction.place_of_performance: pop = transaction.place_of_performance location_country_code = row['place_of_perform_country_c'] place_of_perform_code = row['place_of_performance_code'] state_name = row['place_of_perform_state_nam'] pop = update_country_code("d2", pop, location_country_code, state_code, state_name, place_of_performance_code=place_of_perform_code) pop.save()
def handle_loading(self, db_cursor, *args, **options): def signal_handler(signal, frame): transaction.set_rollback(True) raise Exception('Received interrupt signal. Aborting...') signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) submission_id = options['submission_id'][0] logger.info('Getting submission {} from broker...'.format(submission_id)) db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id]) submission_data = dictfetchall(db_cursor) logger.info('Finished getting submission {} from broker'.format(submission_id)) if len(submission_data) == 0: raise CommandError('Could not find submission with id ' + str(submission_id)) elif len(submission_data) > 1: raise CommandError('Found multiple submissions with id ' + str(submission_id)) submission_data = submission_data[0].copy() broker_submission_id = submission_data['submission_id'] del submission_data['submission_id'] # We use broker_submission_id, submission_id is our own PK submission_attributes = get_submission_attributes(broker_submission_id, submission_data) logger.info('Getting File A data') db_cursor.execute('SELECT * FROM certified_appropriation WHERE submission_id = %s', [submission_id]) appropriation_data = dictfetchall(db_cursor) logger.info('Acquired File A (appropriation) data for ' + str(submission_id) + ', there are ' + str( len(appropriation_data)) + ' rows.') logger.info('Loading File A data') start_time = datetime.now() load_file_a(submission_attributes, appropriation_data, db_cursor) logger.info('Finished loading File A data, took {}'.format(datetime.now() - start_time)) logger.info('Getting File B data') prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor) logger.info( 'Acquired File B (program activity object class) data for ' + str(submission_id) + ', there are ' + str( len(prg_act_obj_cls_data)) + ' rows.') logger.info('Loading File B data') start_time = datetime.now() load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor) logger.info('Finished loading File B data, took {}'.format(datetime.now() - start_time)) logger.info('Getting File C data') # we dont have sub-tier agency info, so we'll do our best # to match them to the more specific award records award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\ format(submission_id) if isinstance(db_cursor, PhonyCursor): # spoofed data for test award_financial_frame = pd.DataFrame(db_cursor.db_responses[award_financial_query]) else: # real data award_financial_frame = pd.read_sql(award_financial_query, connections['data_broker']) logger.info('Acquired File C (award financial) data for {}, there are {} rows.' .format(submission_id, award_financial_frame.shape[0])) logger.info('Loading File C data') start_time = datetime.now() awards_touched = load_file_c(submission_attributes, db_cursor, award_financial_frame) logger.info('Finished loading File C data, took {}'.format(datetime.now() - start_time)) if not options['nosubawards']: try: start_time = datetime.now() logger.info('Loading subaward data...') load_subawards(submission_attributes, awards_touched, db_cursor) logger.info('Finshed loading subaward data, took {}'.format(datetime.now() - start_time)) except Exception: logger.warning("Error loading subawards for this submission") else: logger.info('Skipping subawards due to flags...') # Once all the files have been processed, run any global cleanup/post-load tasks. # Cleanup not specific to this submission is run in the `.handle` method logger.info('Successfully loaded broker submission {}.'.format(options['submission_id'][0]))
def handle_loading(self, db_cursor, *args, **options): def signal_handler(signal, frame): transaction.set_rollback(True) raise Exception("Received interrupt signal. Aborting...") signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) submission_id = options["submission_id"][0] logger.info( "Getting submission {} from broker...".format(submission_id)) db_cursor.execute("SELECT * FROM submission WHERE submission_id = %s", [submission_id]) submission_data = dictfetchall(db_cursor) logger.info( "Finished getting submission {} from broker".format(submission_id)) if len(submission_data) == 0: raise CommandError("Could not find submission with id " + str(submission_id)) elif len(submission_data) > 1: raise CommandError("Found multiple submissions with id " + str(submission_id)) submission_data = submission_data[0].copy() broker_submission_id = submission_data["submission_id"] del submission_data[ "submission_id"] # We use broker_submission_id, submission_id is our own PK submission_attributes = get_submission_attributes( broker_submission_id, submission_data) logger.info("Getting File A data") db_cursor.execute( "SELECT * FROM certified_appropriation WHERE submission_id = %s", [submission_id]) appropriation_data = dictfetchall(db_cursor) logger.info("Acquired File A (appropriation) data for " + str(submission_id) + ", there are " + str(len(appropriation_data)) + " rows.") logger.info("Loading File A data") start_time = datetime.now() load_file_a(submission_attributes, appropriation_data, db_cursor) logger.info( "Finished loading File A data, took {}".format(datetime.now() - start_time)) logger.info("Getting File B data") prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor) logger.info( "Acquired File B (program activity object class) data for " + str(submission_id) + ", there are " + str(len(prg_act_obj_cls_data)) + " rows.") logger.info("Loading File B data") start_time = datetime.now() load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor) logger.info( "Finished loading File B data, took {}".format(datetime.now() - start_time)) logger.info("Getting File C data") # we dont have sub-tier agency info, so we'll do our best # to match them to the more specific award records award_financial_query = ( "SELECT * FROM certified_award_financial" f" WHERE submission_id = {submission_id}" " AND transaction_obligated_amou IS NOT NULL AND transaction_obligated_amou != 0" ) award_financial_frame = pd.read_sql(award_financial_query, connections["data_broker"]) logger.info( "Acquired File C (award financial) data for {}, there are {} rows." .format(submission_id, award_financial_frame.shape[0])) logger.info("Loading File C data") start_time = datetime.now() load_file_c(submission_attributes, db_cursor, award_financial_frame) logger.info( "Finished loading File C data, took {}".format(datetime.now() - start_time)) # Once all the files have been processed, run any global cleanup/post-load tasks. # Cleanup not specific to this submission is run in the `.handle` method logger.info("Successfully loaded broker submission {}.".format( options["submission_id"][0]))
def get_file_b(submission_attributes, db_cursor): """ Get broker File B data for a specific submission. This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP) submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object class code but with one of the direct reimbursable flags set to NULL. From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when it's four digits. Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object classes, it will squash the offending records together so that all financial totals are reporting as a single object class/program activity/TAS record as expected. If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be necessary, we can go back to selecting * from the broker's File B data. Args: submission_attributes: submission object currently being loaded db_cursor: db connection info """ submission_id = submission_attributes.broker_submission_id # does this file B have the dupe object class edge case? check_dupe_oc = ( 'SELECT count(*) ' 'FROM certified_object_class_program_activity ' 'WHERE submission_id = %s ' 'AND length(object_class) = 4 ' 'GROUP BY tas_id, program_activity_code, object_class ' 'HAVING COUNT(*) > 1' ) db_cursor.execute(check_dupe_oc, [submission_id]) dupe_oc_count = len(dictfetchall(db_cursor)) if dupe_oc_count == 0: # there are no object class duplicates, so proceed as usual db_cursor.execute('SELECT * FROM certified_object_class_program_activity WHERE submission_id = %s', [submission_id]) else: # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas, # so combine the records in question combine_dupe_oc = ( 'SELECT ' 'submission_id, ' 'job_id, ' 'agency_identifier, ' 'allocation_transfer_agency, ' 'availability_type_code, ' 'beginning_period_of_availa, ' 'ending_period_of_availabil, ' 'main_account_code, ' 'RIGHT(object_class, 3) AS object_class, ' 'CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'1\' THEN \'d\' ' 'WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'2\' THEN \'r\' ' 'ELSE by_direct_reimbursable_fun END AS by_direct_reimbursable_fun, ' 'tas, ' 'tas_id, ' 'program_activity_code, ' 'program_activity_name, ' 'sub_account_code, ' 'SUM(deobligations_recov_by_pro_cpe) AS deobligations_recov_by_pro_cpe, ' 'SUM(gross_outlay_amount_by_pro_cpe) AS gross_outlay_amount_by_pro_cpe, ' 'SUM(gross_outlay_amount_by_pro_fyb) AS gross_outlay_amount_by_pro_fyb, ' 'SUM(gross_outlays_delivered_or_cpe) AS gross_outlays_delivered_or_cpe, ' 'SUM(gross_outlays_delivered_or_fyb) AS gross_outlays_delivered_or_fyb, ' 'SUM(gross_outlays_undelivered_cpe) AS gross_outlays_undelivered_cpe, ' 'SUM(gross_outlays_undelivered_fyb) AS gross_outlays_undelivered_fyb, ' 'SUM(obligations_delivered_orde_cpe) AS obligations_delivered_orde_cpe, ' 'SUM(obligations_delivered_orde_fyb) AS obligations_delivered_orde_fyb, ' 'SUM(obligations_incurred_by_pr_cpe) AS obligations_incurred_by_pr_cpe, ' 'SUM(obligations_undelivered_or_cpe) AS obligations_undelivered_or_cpe, ' 'SUM(obligations_undelivered_or_fyb) AS obligations_undelivered_or_fyb, ' 'SUM(ussgl480100_undelivered_or_cpe) AS ussgl480100_undelivered_or_cpe, ' 'SUM(ussgl480100_undelivered_or_fyb) AS ussgl480100_undelivered_or_fyb, ' 'SUM(ussgl480200_undelivered_or_cpe) AS ussgl480200_undelivered_or_cpe, ' 'SUM(ussgl480200_undelivered_or_fyb) AS ussgl480200_undelivered_or_fyb, ' 'SUM(ussgl483100_undelivered_or_cpe) AS ussgl483100_undelivered_or_cpe, ' 'SUM(ussgl483200_undelivered_or_cpe) AS ussgl483200_undelivered_or_cpe, ' 'SUM(ussgl487100_downward_adjus_cpe) AS ussgl487100_downward_adjus_cpe, ' 'SUM(ussgl487200_downward_adjus_cpe) AS ussgl487200_downward_adjus_cpe, ' 'SUM(ussgl488100_upward_adjustm_cpe) AS ussgl488100_upward_adjustm_cpe, ' 'SUM(ussgl488200_upward_adjustm_cpe) AS ussgl488200_upward_adjustm_cpe, ' 'SUM(ussgl490100_delivered_orde_cpe) AS ussgl490100_delivered_orde_cpe, ' 'SUM(ussgl490100_delivered_orde_fyb) AS ussgl490100_delivered_orde_fyb, ' 'SUM(ussgl490200_delivered_orde_cpe) AS ussgl490200_delivered_orde_cpe, ' 'SUM(ussgl490800_authority_outl_cpe) AS ussgl490800_authority_outl_cpe, ' 'SUM(ussgl490800_authority_outl_fyb) AS ussgl490800_authority_outl_fyb, ' 'SUM(ussgl493100_delivered_orde_cpe) AS ussgl493100_delivered_orde_cpe, ' 'SUM(ussgl497100_downward_adjus_cpe) AS ussgl497100_downward_adjus_cpe, ' 'SUM(ussgl497200_downward_adjus_cpe) AS ussgl497200_downward_adjus_cpe, ' 'SUM(ussgl498100_upward_adjustm_cpe) AS ussgl498100_upward_adjustm_cpe, ' 'SUM(ussgl498200_upward_adjustm_cpe) AS ussgl498200_upward_adjustm_cpe ' 'FROM certified_object_class_program_activity ' 'WHERE submission_id = %s ' 'GROUP BY ' 'submission_id, ' 'job_id, ' 'agency_identifier, ' 'allocation_transfer_agency, ' 'availability_type_code, ' 'beginning_period_of_availa, ' 'ending_period_of_availabil, ' 'main_account_code, ' 'RIGHT(object_class, 3), ' 'CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'1\' THEN \'d\' ' 'WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'2\' THEN \'r\' ' 'ELSE by_direct_reimbursable_fun END, ' 'program_activity_code, ' 'program_activity_name, ' 'sub_account_code, ' 'tas, ' 'tas_id' ) logger.info( 'Found {} duplicated File B 4 digit object codes in submission {}. ' 'Aggregating financial values.'.format(dupe_oc_count, submission_id)) # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together db_cursor.execute(combine_dupe_oc, [submission_id]) data = dictfetchall(db_cursor) return data
def gather_next_subawards(db_cursor, award_type, subaward_type, max_id, offset): """ Get next batch of subawards of the relevant type starting at a given offset """ query_columns = [ 'award.internal_id', 'award.id', 'award.report_period_mon', 'award.report_period_year', 'sub_award.duns AS duns', 'sub_award.parent_duns AS parent_duns', 'sub_award.dba_name AS dba_name', 'sub_award.principle_place_country AS principle_place_country', 'sub_award.principle_place_city AS principle_place_city', 'sub_award.principle_place_zip AS principle_place_zip', 'sub_award.principle_place_state AS principle_place_state', 'sub_award.principle_place_state_name AS principle_place_state_name', 'sub_award.principle_place_street AS principle_place_street', 'sub_award.principle_place_district AS principle_place_district', 'sub_award.top_paid_fullname_1', 'sub_award.top_paid_amount_1', 'sub_award.top_paid_fullname_2', 'sub_award.top_paid_amount_2', 'sub_award.top_paid_fullname_3', 'sub_award.top_paid_amount_3', 'sub_award.top_paid_fullname_4', 'sub_award.top_paid_amount_4', 'sub_award.top_paid_fullname_5', 'sub_award.top_paid_amount_5', ] # We need different columns depending on if it's a procurement or a grant. Setting some columns to have labels # so we can easily access them without making two different dictionaries. if award_type == 'procurement': query_columns.extend([ 'award.contract_number AS piid', 'sub_award.naics AS naics_code', 'sub_award.subcontract_num AS subaward_num', 'sub_award.subcontract_amount AS subaward_amount', 'sub_award.overall_description AS description', 'sub_award.recovery_model_q1 AS q1_flag', 'sub_award.recovery_model_q2 AS q2_flag', 'sub_award.subcontract_date AS action_date', 'sub_award.company_name AS recipient_name', 'sub_award.company_address_country AS recipient_location_country_code', 'sub_award.company_address_city AS recipient_location_city_name', 'sub_award.company_address_zip AS recipient_location_zip4', 'LEFT(sub_award.company_address_zip, 5) AS recipient_location_zip5', 'sub_award.company_address_state AS recipient_location_state_code', 'sub_award.company_address_state_name AS recipient_location_state_name', 'sub_award.company_address_street AS recipient_location_street_address', 'sub_award.company_address_district AS recipient_location_congressional_code', 'sub_award.parent_company_name AS parent_recipient_name', 'sub_award.bus_types AS bus_types', ]) _select = "SELECT {}" _from = "FROM fsrs_{} AS award JOIN fsrs_{} AS sub_award ON sub_award.parent_id = award.id" _where = "WHERE award.id > {} AND sub_award.subcontract_num IS NOT NULL" _other = "ORDER BY award.id, sub_award.id LIMIT {} OFFSET {}" query = " ".join([_select, _from, _where, _other]).format(",".join(query_columns), award_type, subaward_type, str(max_id), str(QUERY_LIMIT), str(offset)) else: # grant query_columns.extend([ 'sub_award.cfda_numbers', 'sub_award.subaward_num', 'sub_award.subaward_amount', 'sub_award.project_description AS description', 'sub_award.compensation_q1 AS q1_flag', 'sub_award.compensation_q2 AS q2_flag', 'sub_award.subaward_date AS action_date', 'sub_award.awardee_name AS recipient_name', 'sub_award.awardee_address_country AS recipient_location_country_code', 'sub_award.awardee_address_city AS recipient_location_city_name', 'sub_award.awardee_address_zip AS recipient_location_zip4', 'LEFT(sub_award.awardee_address_zip, 5) AS recipient_location_zip5', 'sub_award.awardee_address_state AS recipient_location_state_code', 'sub_award.awardee_address_state_name AS recipient_location_state_name', 'sub_award.awardee_address_street AS recipient_location_street_address', 'sub_award.awardee_address_district AS recipient_location_congressional_code', 'UPPER(award.fain) AS fain', ]) _select = "SELECT {}" _from = "FROM fsrs_{} AS award JOIN fsrs_{} AS sub_award ON sub_award.parent_id = award.id" _where = "WHERE award.id > {} AND sub_award.subaward_num IS NOT NULL" _other = "ORDER BY award.id, sub_award.id LIMIT {} OFFSET {}" query = " ".join([_select, _from, _where, _other]).format(",".join(query_columns), award_type, subaward_type, str(max_id), str(QUERY_LIMIT), str(offset)) db_cursor.execute(query) return dictfetchall(db_cursor)
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } legal_entity_location_value_map = { "recipient_flag": True } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") # skip_count = 0 start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): # if TransactionFPDS.objects.values('detached_award_procurement_id').\ # filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first(): # skip_count += 1 # # if not (skip_count % 100): # logger.info('Skipped {} records so far'.format(str(skip_count))) if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def get_fpds_data(date): if not hasattr(date, 'month'): date = datetime.strptime(date, '%Y-%m-%d').date() db_cursor = connections['data_broker'].cursor() # The ORDER BY is important here because deletions must happen in a specific order and that order is defined # by the Broker's PK since every modification is a new row db_query = 'SELECT * ' \ 'FROM detached_award_procurement ' \ 'WHERE updated_at >= %s' db_args = [date] db_cursor.execute(db_query, db_args) db_rows = dictfetchall(db_cursor) # this returns an OrderedDict ids_to_delete = [] if settings.IS_LOCAL: for file in os.listdir(settings.CSV_LOCAL_PATH): if re.search('.*_delete_records_(IDV|award).*', file) and \ datetime.strptime(file[:file.find('_')], '%m-%d-%Y').date() >= date: with open(settings.CSV_LOCAL_PATH + file, 'r') as current_file: # open file, split string to array, skip the header reader = csv.reader(current_file.read().splitlines()) next(reader) unique_key_list = [rows[0] for rows in reader] ids_to_delete += unique_key_list else: # Connect to AWS aws_region = os.environ.get('AWS_REGION') fpds_bucket_name = os.environ.get('FPDS_BUCKET_NAME') if not (aws_region or fpds_bucket_name): raise Exception( 'Missing required environment variables: AWS_REGION, FPDS_BUCKET_NAME' ) s3client = boto3.client('s3', region_name=aws_region) s3resource = boto3.resource('s3', region_name=aws_region) s3_bucket = s3resource.Bucket(fpds_bucket_name) # make an array of all the keys in the bucket file_list = [item.key for item in s3_bucket.objects.all()] # Only use files that match the date we're currently checking for item in file_list: # if the date on the file is the same day as we're checking if re.search('.*_delete_records_(IDV|award).*', item) and '/' not in item and \ datetime.strptime(item[:item.find('_')], '%m-%d-%Y').date() >= date: # make the url params to pass url_params = {'Bucket': fpds_bucket_name, 'Key': item} # get the url for the current file file_path = s3client.generate_presigned_url( 'get_object', Params=url_params) current_file = urllib.request.urlopen(file_path) reader = csv.reader( current_file.read().decode("utf-8").splitlines()) # skip the header, the reader doesn't ignore it for some reason next(reader) # make an array of all the detached_award_procurement_ids unique_key_list = [rows[0] for rows in reader] ids_to_delete += unique_key_list logger.info('Number of records to insert/update: %s' % str(len(db_rows))) logger.info('Number of records to delete: %s' % str(len(ids_to_delete))) return db_rows, ids_to_delete
def load_subawards(submission_attributes, awards_touched, db_cursor): """ Loads File F from the broker. db_cursor should be the db_cursor for Broker """ # A list of award id's to update the subaward accounts and totals on award_ids_to_update = set() # Get a list of PIIDs from this submission # TODO: URIS awards_touched = [Award.objects.filter(id=award_id).first() for award_id in awards_touched] piids = list([award.piid for award in awards_touched if award.piid]) fains = list([award.fain for award in awards_touched if award.fain]) uris = list([award.uri for award in awards_touched if award.uri]) # This allows us to handle an empty list in the SQL without changing the query piids.append(None) fains.append(None) # D1 File F db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)]) d1_f_data = dictfetchall(db_cursor) logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data))) d1_create_count = 0 d1_update_count = 0 d1_empty_count = 0 for row in d1_f_data: if row['subcontract_num'] is None: if row['id'] is not None and row['subcontract_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d1_empty_count += 1 continue # Get the agency agency = get_valid_awarding_agency(row) if not agency: logger.warn( "Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format( row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-contract to # We perform this lookup by finding the Award containing a transaction with a matching parent award id, piid, # and submission attributes award = Award.objects.filter( awarding_agency=agency, latest_transaction__contract_data__piid=row['piid'], latest_transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by( "-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn( "Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format( row['subcontract_num'], row['piid'], row['parent_award_id'])) continue award_ids_to_update.add(award.id) # Get or create unique DUNS-recipient pair recipient, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['duns'], recipient_name=row['company_name'] ) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d1_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) d1_f_dict = { 'award': award, 'recipient': recipient, 'data_source': "DBR", 'cfda': None, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subcontract_num'], 'amount': row['subcontract_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['recovery_model_q1'], 'recovery_model_question2': row['recovery_model_q2'], 'action_date': row['subcontract_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'] } # Create the subaward subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'], award=award, defaults=d1_f_dict) if created: d1_create_count += 1 else: d1_update_count += 1 # D2 File F db_cursor.execute(D2_FILE_F_QUERY, [tuple(fains), tuple(uris)]) d2_f_data = dictfetchall(db_cursor) logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data))) d2_create_count = 0 d2_update_count = 0 d2_empty_count = 0 for row in d2_f_data: if row['subaward_num'] is None: if row['id'] is not None and row['subaward_amount'] is not None: logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"])) logger.warn("Failing row: {}".format(row)) else: d2_empty_count += 1 continue agency = get_valid_awarding_agency(row) if not agency: logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier " "code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])) continue # Find the award to attach this sub-award to # We perform this lookup by finding the Award containing a transaction with a matching fain and submission. # If this fails, try submission and uri if row['fain'] and len(row['fain']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__fain=row['fain']).\ distinct().order_by("-date_signed").first() # Couldn't find a match on FAIN, try URI if it exists if not award and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__uri=row['uri']).distinct().first() # Try both if not award and row['fain'] and len(row['fain']) > 0 and row['uri'] and len(row['uri']) > 0: award = Award.objects.filter(awarding_agency=agency, latest_transaction__assistance_data__fain=row['fain'], latest_transaction__assistance_data__uri=row['uri']).\ distinct().order_by("-date_signed").first() # We don't have a matching award for this subcontract, log a warning and continue to the next row if not award: logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; " "skipping...".format(row['subaward_num'], row['fain'], row['uri'])) continue award_ids_to_update.add(award.id) recipient_name = row['awardee_name'] if recipient_name is None: recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Get or create unique DUNS-recipient pair recipient, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['duns'], recipient_name=recipient_name ) if created: recipient.parent_recipient_unique_id = row['parent_duns'] recipient.location = get_or_create_location(row, location_d2_recipient_mapper) recipient.save() # Get or create POP place_of_performance = get_or_create_location(row, pop_mapper) # Get CFDA Program cfda = Cfda.objects.filter(program_number=row['cfda_number']).first() d2_f_dict = { 'award': award, 'recipient': recipient, 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': award.awarding_agency, 'funding_agency': award.funding_agency, 'place_of_performance': place_of_performance, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['project_description'], 'recovery_model_question1': row['compensation_q1'], 'recovery_model_question2': row['compensation_q2'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'] } # Create the subaward subaward, created = Subaward.objects.update_or_create( subaward_number=row['subaward_num'], award=award, defaults=d2_f_dict ) if created: d2_create_count += 1 else: d2_update_count += 1 # Update Award objects with subaward aggregates update_award_subawards(tuple(award_ids_to_update)) logger.info( """Submission {} Subcontracts created: {} Subcontracts updated: {} Empty subcontract rows: {} Subawards created: {} Subawards updated: {} Empty subaward rows: {}""".format(submission_attributes.broker_submission_id, d1_create_count, d1_update_count, d1_empty_count, d2_create_count, d2_update_count, d2_empty_count))