def get_or_create_award(self, row, awarding_agency): fain = row.get("federal_award_id", None) uri = row.get("unique_transaction_id", None) # ask: why unique_transaction_id instead of uri? created, award = Award.get_or_create_summary_award( fain=fain, uri=uri, awarding_agency=awarding_agency) return award
def get_or_create_award(self, row, awarding_agency_id): piid = row.get("piid", None) parent_award_id = row.get("idvpiid", None) awarding_agency = Agency.objects.get(id=awarding_agency_id) award = Award.get_or_create_summary_award( piid=piid, fain=None, uri=None, awarding_agency=awarding_agency, parent_award_id=parent_award_id) return award
def load_parent_awards(self, fpds_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Parent Awards: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = self.subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = self.subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = self.toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = self.agency_no_sub_map.get( (row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])) if awarding_agency is None: awarding_agency = self.agency_sub_only_map.get( row['awarding_agency_code']) # parent_award_id from the row = parent piid parent_award_piid = row.get('parent_award_id') parent_award = None if parent_award_piid: parent_award = self.award_map.get(parent_award_piid) if not parent_award: create_kwargs = { 'awarding_agency': awarding_agency, 'piid': parent_award_piid } parent_award = Award(**create_kwargs) self.award_map[parent_award_piid] = parent_award parent_award_bulk.append(parent_award) parent_award_lookup.append(parent_award) logger.info('Bulk creating Parent Awards (batch_size: {})...'.format( BATCH_SIZE)) Award.objects.bulk_create(parent_award_bulk, batch_size=BATCH_SIZE)
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } fpds_normalized_field_map = { "type": "contract_award_type", "description": "award_description" } fpds_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount" } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True} ) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], piid=row["piid"], ) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"): # 19 characters dt_fmt = "%Y-%m-%d %H:%M:%S" else: dt_fmt = "%Y-%m-%d %H:%M:%S.%f" # try using this even if last_modified isn't a valid string try: last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date() except ValueError: # handle odd-string formats and NULLs from the upstream FPDS-NG system info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt) logger.info(info_message) last_mod_date = None award_type, award_type_desc = award_types(row) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "is_fpds": True, "type": award_type, "type_description": award_type_desc, } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fpds_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=fpds_field_map, as_dict=True ) detached_award_proc_unique = contract_instance["detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict ) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } legal_entity_location_value_map = { "recipient_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = {'broker_submission_id': submission_attributes.broker_submission_id} run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True} ) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = ( "CONT_AW_" + (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-") + "_" + (row["piid"] if row["piid"] else "-NONE-") + "_" + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-") ) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row["piid"] ) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"): # 19 characters dt_fmt = "%Y-%m-%d %H:%M:%S" else: dt_fmt = "%Y-%m-%d %H:%M:%S.%f" # try using this even if last_modified isn't a valid string try: last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date() except ValueError: # handle odd-string formats and NULLs from the upstream FPDS-NG system info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt) logger.info(info_message) last_mod_date = None award_type, award_type_desc = award_types(row) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "generated_unique_award_id": generated_unique_id, "is_fpds": True, "type": award_type, "type_description": award_type_desc, } contract_field_map = {"description": "award_description"} transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model(TransactionFPDS(), row, as_dict=True) # thrown away detached_award_proc_unique = contract_instance["detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict ) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor): """ Process and load file D2 broker data (financial assistance award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } for row in award_financial_assistance_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'] } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) fad_value_map = { "transaction": transaction_instance, "submission": submission_attributes, "cfda": CFDAProgram.objects.filter( program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionAssistance(), row, field_map=fad_field_map, value_map=fad_value_map, save=True)
def load_file_c(submission_attributes, award_financial_data, db_cursor): """ Process and load file C broker data. Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some additional information about the awarding sub-tier agency. """ # this matches the file b reverse directive, but am repeating it here # to ensure that we don't overwrite it as we change up the order of # file loading reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$') for row in award_financial_data: # Check and see if there is an entry for this TAS treasury_account = get_treasury_appropriation_account_tas_lookup( row.get('tas_id'), db_cursor) if treasury_account is None: raise Exception('Could not find appropriation account for TAS: ' + row['tas']) # Find a matching transaction record, so we can use its # subtier agency information to match to (or create) an Award record awarding_cgac = row.get('agency_identifier') # cgac from record's TAS txn = get_award_financial_transaction( awarding_cgac, piid=row.get('piid'), parent_award_id=row.get('parent_award_id'), fain=row.get('fain'), uri=row.get('uri')) if txn is not None: # We found a matching transaction, so grab its awarding agency # info and pass it get_or_create_summary_award awarding_agency = txn.awarding_agency else: # No matching transaction found, so find/create Award by using # topiter agency only, since CGAC code is the only piece of # awarding agency info that we have. awarding_agency = Agency.get_by_toptier(awarding_cgac) # Find the award that this award transaction belongs to. If it doesn't exist, create it. created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id'), use_cache=False) award_financial_data = FinancialAccountsByAwards() value_map = { 'award': award, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, 'treasury_account': treasury_account, 'object_class': get_or_create_object_class(row['object_class'], row['by_direct_reimbursable_fun'], logger), 'program_activity': get_or_create_program_activity(row, submission_attributes) } # Still using the cpe|fyb regex compiled above for reverse afd = load_data_into_model(award_financial_data, row, value_map=value_map, save=True, reverse=reverse) awards_cache.clear()
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } legal_entity_location_value_map = { "recipient_flag": True } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") # skip_count = 0 start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): # if TransactionFPDS.objects.values('detached_award_procurement_id').\ # filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first(): # skip_count += 1 # # if not (skip_count % 100): # logger.info('Skipped {} records so far'.format(str(skip_count))) if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") # skip_count = 0 start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): # if TransactionFPDS.objects.values('detached_award_procurement_id').\ # filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first(): # skip_count += 1 # # if not (skip_count % 100): # logger.info('Skipped {} records so far'.format(str(skip_count))) if not (index % 100): logger.info( 'D1 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map)) # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def insert_new_fpds(self, to_insert, total_rows): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", } for index, row in enumerate(to_insert, 1): upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row["awardee_or_recipient_legal"] legal_entity = LegalEntity.objects.create( recipient_unique_id=row["awardee_or_recipient_uniqu"], recipient_name=recipient_name if recipient_name is not None else "", ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type="fpds"), "is_fpds": True, } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = ( "CONT_AW_" + (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-") + "_" + (row["piid"] if row["piid"] else "-NONE-") + "_" + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-")) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row["piid"]) award.parent_award_piid = row.get("parent_award_id") award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) try: last_mod_date = datetime.strptime(str( row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["last_modified"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "transaction_unique_id": row["detached_award_proc_unique"], "generated_unique_award_id": generated_unique_id, "is_fpds": True, } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True, ) contract_instance = load_data_into_model( TransactionFPDS(), row, as_dict=True) # thrown away detached_award_proc_unique = contract_instance[ "detached_award_proc_unique"] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fpds.first().transaction.id).update( **transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = detached_award_proc_unique legal_entity.save()
def insert_new_fabs(to_insert): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city", } fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } update_award_ids = [] for row in to_insert: upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "", parent_recipient_unique_id=row['ultimate_parent_unique_ide'], ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": row['business_types_desc'], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row['unique_award_key'], fain=row['fain'], uri=row['uri'], record_type=row['record_type'], ) award.save() # Append row to list of Awards updated update_award_ids.append(award.id) try: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": row['assistance_type_desc'], "transaction_unique_id": row['afa_generated_unique'], } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, field_map=fabs_field_map, as_dict=True) # Hack to cut back on the number of warnings dumped to the log. financial_assistance_data['updated_at'] = cast_datetime_to_utc( financial_assistance_data['updated_at']) financial_assistance_data['created_at'] = cast_datetime_to_utc( financial_assistance_data['created_at']) financial_assistance_data['modified_at'] = cast_datetime_to_utc( financial_assistance_data['modified_at']) afa_generated_unique = financial_assistance_data[ 'afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction_normalized = TransactionNormalized( **transaction_normalized_dict) transaction_normalized.save() # Create TransactionFABS transaction_fabs = TransactionFABS( transaction=transaction_normalized, **financial_assistance_data) transaction_fabs.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = afa_generated_unique legal_entity.save() return update_award_ids
def insert_new_fabs(to_insert): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city", } update_award_ids = [] for row in to_insert: upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "", parent_recipient_unique_id=row['ultimate_parent_unique_ide'], ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": row['business_types_desc'], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri # this will raise an exception if the cast to an int fails, that's ok since we don't want to process # non-numeric record type values record_type_int = int(row['record_type']) if record_type_int == 1: uri = row['uri'] if row['uri'] else '-NONE-' fain = '-NONE-' elif record_type_int in (2, 3): uri = '-NONE-' fain = row['fain'] if row['fain'] else '-NONE-' else: msg = "Invalid record type encountered for the following afa_generated_unique record: {}" raise Exception(msg.format(row['afa_generated_unique'])) astac = row["awarding_sub_tier_agency_c"] if row["awarding_sub_tier_agency_c"] else "-NONE-" generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, fain=row['fain'], uri=row['uri'], record_type=row['record_type'], ) award.save() # Append row to list of Awards updated update_award_ids.append(award.id) try: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": row['assistance_type_desc'], "transaction_unique_id": row['afa_generated_unique'], "generated_unique_award_id": generated_unique_id, } fad_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model(TransactionFABS(), row, as_dict=True) # thrown away # Hack to cut back on the number of warnings dumped to the log. financial_assistance_data['updated_at'] = cast_datetime_to_utc(financial_assistance_data['updated_at']) financial_assistance_data['created_at'] = cast_datetime_to_utc(financial_assistance_data['created_at']) financial_assistance_data['modified_at'] = cast_datetime_to_utc(financial_assistance_data['modified_at']) afa_generated_unique = financial_assistance_data['afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now(timezone.utc) transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict ) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction_normalized = TransactionNormalized(**transaction_normalized_dict) transaction_normalized.save() # Create TransactionFABS transaction_fabs = TransactionFABS(transaction=transaction_normalized, **financial_assistance_data) transaction_fabs.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = afa_generated_unique legal_entity.save() return update_award_ids
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = "10/01/" + str(fiscal_year - 1) fy_end = "09/30/" + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += " action_date::Date BETWEEN %s AND %s" arguments += [fy_begin] arguments += [fy_end] query += " ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s" arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } logger.info("Getting total rows") total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # ROW ITERATION STARTS HERE award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] awarding_agency_list = [] funding_agency_list = [] logger.info("Getting award objects for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row["awarding_agency_code"] is None or len( row["awarding_agency_code"].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]["toptier_agency_id"] awarding_toptier_code = toptier_agency_map[ awarding_toptier_agency_id] row["awarding_agency_code"] = awarding_toptier_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row["funding_agency_code"] is None or len( row["funding_agency_code"].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = subtier_to_agency_map[ funding_subtier_agency_id]["toptier_agency_id"] funding_toptier_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_toptier_code = None row["funding_agency_code"] = funding_toptier_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row["awarding_agency_code"], row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_toptier_subtier( row["funding_agency_code"], row["funding_sub_tier_agency_co"]) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get("fain"), uri=row.get("uri"), generated_unique_award_id=row.get("unique_award_key"), save=False, ) award_bulk.append(award) award_update_id_list.append(award.id) award_assistance_update_id_list.append(award.id) logger.info("Bulk creating {} award rows...".format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ") logger.info("Getting transaction_normalized for {} rows...".format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) transaction_normalized = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info("Bulk creating {} TransactionNormalized rows...".format( len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create( transaction_normalized_bulk) except IntegrityError: logger.info( "Tried and failed to insert duplicate transaction_normalized row. Continuing... " ) for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) transaction_assistance = TransactionFABS( transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info("Bulk creating TransactionFABS rows...") try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info("!!! DUPLICATES FOUND. Continuing... ")
def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFPDS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True) query = "SELECT * FROM detached_award_procurement" arguments = [] fy_begin = "10/01/" + str(fiscal_year - 1) fy_end = "09/30/" + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += " action_date::Date BETWEEN %s AND %s" arguments += [fy_begin] arguments += [fy_end] query += " ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s" arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") procurement_data = dictfetchall(db_cursor) fpds_normalized_field_map = { "type": "contract_award_type", "description": "award_description" } fpds_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } logger.info("Getting total rows") total_rows = len(procurement_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of procurement data") start_time = datetime.now() for index, row in enumerate(procurement_data, 1): with db_transaction.atomic(): if not (index % 100): logger.info( "D1 File Load: Loading row {} of {} ({})".format( str(index), str(total_rows), datetime.now() - start_time)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row["awarding_agency_code"] is None or len( row["awarding_agency_code"].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]["toptier_agency_id"] awarding_toptier_code = toptier_agency_map[ awarding_toptier_agency_id] row["awarding_agency_code"] = awarding_toptier_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row["funding_agency_code"] is None or len( row["funding_agency_code"].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = subtier_to_agency_map[ funding_subtier_agency_id]["toptier_agency_id"] funding_toptier_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_toptier_code = None row["funding_agency_code"] = funding_toptier_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row["awarding_agency_code"], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get("piid"), fain=row.get("fain"), uri=row.get("uri"), parent_award_piid=row.get("parent_award_id"), generated_unique_award_id=row.get("unique_award_key"), ) award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier( row["funding_agency_code"], row["funding_sub_tier_agency_co"]), "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fpds_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_instance = load_data_into_model( TransactionFPDS(), row, field_map=fpds_field_map, as_dict=True # thrown away ) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) # catch exception and do nothing if we see # "django.db.utils.IntegrityError: duplicate key value violates unique constraint" try: transaction_contract.save() except IntegrityError: pass
def get_or_create_summary_award(awarding_agency=None, piid=None, fain=None, uri=None, parent_award_id=None, save=True): """ Given a set of award identifiers and awarding agency information, find a corresponding Award record. If we can't find one, create it. Returns: created: a list of new awards created, used to enable bulk insert summary_award: the summary award that the calling process can map to """ # If an award transaction's ID is a piid, it's contract data # If the ID is fain or a uri, it's financial assistance. If the award transaction # has both a fain and a uri, include both. try: # Look for an existing award record # Check individual FILE C D linkage first lookup_kwargs = {'recipient_id__isnull': False} if piid is not None: lookup_kwargs['piid'] = piid if parent_award_id is not None: lookup_kwargs['parent_award__piid'] = parent_award_id if fain is not None: lookup_kwargs['fain'] = fain if uri is not None: lookup_kwargs['uri'] = uri award_queryset = Award.objects.filter(**lookup_kwargs)[:2] award_count = len(award_queryset) if award_count == 1: summary_award = award_queryset[0] return [], summary_award # if nothing found revert to looking for an award that this record could've already created lookup_kwargs = {"awarding_agency": awarding_agency} for i in [(piid, "piid"), (fain, "fain"), (uri, "uri")]: lookup_kwargs[i[1]] = i[0] if parent_award_id: # parent_award__piid lookup_kwargs["parent_award_piid"] = parent_award_id # Look for an existing award record summary_award = Award.objects \ .filter(Q(**lookup_kwargs)) \ .filter(awarding_agency=awarding_agency) \ .first() if (summary_award is None and awarding_agency is not None and awarding_agency.toptier_agency.name != awarding_agency.subtier_agency.name): # No award match found when searching by award id info + # awarding subtier agency. Relax the awarding agency # critera to just the toptier agency instead of the subtier # agency and try the search again. awarding_agency_toptier = Agency.get_by_toptier( awarding_agency.toptier_agency.cgac_code) summary_award = Award.objects \ .filter(Q(**lookup_kwargs)) \ .filter(awarding_agency=awarding_agency_toptier) \ .first() if summary_award: return [], summary_award parent_created, parent_award = [], None # Now create the award record for this award transaction create_kwargs = { "awarding_agency": awarding_agency, "parent_award": parent_award, "parent_award_piid": parent_award_id } for i in [(piid, "piid"), (fain, "fain"), (uri, "uri")]: create_kwargs[i[1]] = i[0] summary_award = Award(**create_kwargs) created = [ summary_award, ] created.extend(parent_created) if save: summary_award.save() return created, summary_award # Do not use bare except except ValueError: raise ValueError( 'Unable to find or create an award with the provided information: ' 'piid={}, fain={}, uri={}, parent_id={}, awarding_agency={}'. format(piid, fain, uri, parent_award_id, awarding_agency))
def load_awards(self, fpds_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Awards: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = self.subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = self.subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = self.toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = self.subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = self.subtier_to_agency_map[funding_subtier_agency_id][ 'toptier_agency_id'] funding_cgac_code = self.toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = self.agency_no_sub_map.get(( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] )) if awarding_agency is None: awarding_agency = self.agency_sub_only_map.get(row['awarding_agency_code']) # If we still haven't found the agency, try surmising it from subtier if awarding_agency is None: awarding_agency = self.agency_subtier_map.get(row['awarding_sub_tier_agency_c']) funding_agency = self.agency_no_sub_map.get(( row['funding_agency_code'], row["funding_sub_tier_agency_co"] )) if funding_agency is None: funding_agency = self.agency_sub_only_map.get(row['funding_agency_code']) if funding_agency is None: funding_agency = self.agency_subtier_map.get(row['funding_sub_tier_agency_co']) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) piid = row.get('piid') award = self.award_map.get(piid) if award and awarding_agency is not None and award.awarding_agency_id is not None: if award.awarding_agency_id != awarding_agency.id: award = None if not award: # create the award since it wasn't found create_kwargs = {'awarding_agency': awarding_agency, 'piid': piid} award = Award(**create_kwargs) award.parent_award = parent_award_lookup[index - 1] self.award_map[piid] = award award_bulk.append(award) award_lookup.append(award) logger.info('Bulk creating Awards (batch_size: {})...'.format(BATCH_SIZE)) Award.objects.bulk_create(award_bulk, batch_size=BATCH_SIZE)
def load_file_d2( submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing ): """ Process and load file D2 broker data (financial assistance award txns). """ d_start_time = time.time() if quick: setup_broker_fdw() parameters = {'broker_submission_id': submission_attributes.broker_submission_id} run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters) logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time)) return legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = { "recipient_flag": True } place_of_performance_value_map = { "place_of_performance_flag": True } fad_field_map = { "type": "assistance_type", "description": "award_description", } total_rows = len(award_financial_assistance_data) start_time = datetime.now() for index, row in enumerate(award_financial_assistance_data, 1): if not (index % 100): logger.info('D2 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) row = row_preprocessor(row) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map ) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map ) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri')) award.save() award_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() fad_value_map = { "submission": submission_attributes, "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, field_map=fad_field_map, value_map=fad_value_map, as_dict=True) transaction_assistance = TransactionFABS.get_or_create_2(transaction=transaction, **financial_assistance_data) transaction_assistance.save() logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
def insert_new_fabs(to_insert): fabs_normalized_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } fabs_field_map = { "officer_1_name": "high_comp_officer1_full_na", "officer_1_amount": "high_comp_officer1_amount", "officer_2_name": "high_comp_officer2_full_na", "officer_2_amount": "high_comp_officer2_amount", "officer_3_name": "high_comp_officer3_full_na", "officer_3_amount": "high_comp_officer3_amount", "officer_4_name": "high_comp_officer4_full_na", "officer_4_amount": "high_comp_officer4_amount", "officer_5_name": "high_comp_officer5_full_na", "officer_5_amount": "high_comp_officer5_amount", } update_award_ids = [] for row in to_insert: upper_case_dict_values(row) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=row["unique_award_key"], fain=row["fain"], uri=row["uri"], record_type=row["record_type"], ) award.save() # Append row to list of Awards updated update_award_ids.append(award.id) try: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row["modified_at"]), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "period_of_performance_start_date": format_date(row["period_of_performance_star"]), "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]), "action_date": format_date(row["action_date"]), "last_modified_date": last_mod_date, "type_description": row["assistance_type_desc"], "transaction_unique_id": row["afa_generated_unique"], "business_categories": get_business_categories(row=row, data_type="fabs"), } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fabs_normalized_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model( TransactionFABS(), row, field_map=fabs_field_map, as_dict=True # thrown away ) # Hack to cut back on the number of warnings dumped to the log. financial_assistance_data["updated_at"] = cast_datetime_to_utc( financial_assistance_data["updated_at"]) financial_assistance_data["created_at"] = cast_datetime_to_utc( financial_assistance_data["created_at"]) financial_assistance_data["modified_at"] = cast_datetime_to_utc( financial_assistance_data["modified_at"]) afa_generated_unique = financial_assistance_data[ "afa_generated_unique"] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction_normalized = TransactionNormalized( **transaction_normalized_dict) transaction_normalized.save() # Create TransactionFABS transaction_fabs = TransactionFABS( transaction=transaction_normalized, **financial_assistance_data) transaction_fabs.save() return update_award_ids
def insert_new_fabs(self, to_insert, total_rows): logger.info('Starting insertion of new FABS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info('Inserting Stale FABS: Inserting row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) for key in row: if isinstance(row[key], str): row[key] = row[key].upper() # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "" ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": get_business_type_description(row['business_types']) } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri # this will raise an exception if the cast to an int fails, that's ok since we don't want to process # non-numeric record type values record_type_int = int(row['record_type']) if record_type_int == 1: uri = row['uri'] if row['uri'] else '-NONE-' fain = '-NONE-' elif record_type_int == 2: uri = '-NONE-' fain = row['fain'] if row['fain'] else '-NONE-' else: raise Exception('Invalid record type encountered for the following afa_generated_unique record: %s' % row['afa_generated_unique']) generated_unique_id = 'ASST_AW_' +\ (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \ fain + '_' + uri # Create the summary Award (created, award) = Award.get_or_create_summary_award(generated_unique_award_id=generated_unique_id, fain=row['fain'], uri=row['uri'], record_type=row['record_type']) award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": get_assistance_type_description(row['assistance_type']), "transaction_unique_id": row['afa_generated_unique'], "generated_unique_award_id": generated_unique_id } fad_field_map = { "type": "assistance_type", "description": "award_description", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) afa_generated_unique = financial_assistance_data['afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\ update(**transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction = TransactionNormalized(**transaction_normalized_dict) transaction.save() # Create TransactionFABS transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data) transaction_fabs.save()
def load_file_c(submission_attributes, db_cursor, award_financial_frame): """ Process and load file C broker data. Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some additional information about the awarding sub-tier agency. """ # this matches the file b reverse directive, but am repeating it here # to ensure that we don't overwrite it as we change up the order of # file loading reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$') award_financial_frame['txn'] = award_financial_frame.apply( get_award_financial_transaction, axis=1) award_financial_frame['awarding_agency'] = award_financial_frame.apply( get_awarding_agency, axis=1) award_financial_frame['object_class'] = award_financial_frame.apply( get_or_create_object_class_rw, axis=1, logger=logger) award_financial_frame['program_activity'] = award_financial_frame.apply( get_or_create_program_activity, axis=1, submission_attributes=submission_attributes) # for row in award_financial_data: for row in award_financial_frame.replace({ np.nan: None }).to_dict(orient='records'): # Check and see if there is an entry for this TAS treasury_account = get_treasury_appropriation_account_tas_lookup( row.get('tas_id'), db_cursor) if treasury_account is None: raise Exception('Could not find appropriation account for TAS: ' + row['tas']) # Find a matching transaction record, so we can use its # subtier agency information to match to (or create) an Award record # Find the award that this award transaction belongs to. If it doesn't exist, create it. created, award = Award.get_or_create_summary_award( awarding_agency=row['awarding_agency'], piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id'), use_cache=False) award_financial_data = FinancialAccountsByAwards() value_map = { 'award': award, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, 'treasury_account': treasury_account, 'object_class': row.get('object_class'), 'program_activity': row.get('program_activity'), } # Still using the cpe|fyb regex compiled above for reverse afd = load_data_into_model(award_financial_data, row, value_map=value_map, save=True, reverse=reverse) awards_cache.clear()
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = { 'broker_submission_id': submission_attributes.broker_submission_id } run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get( 'parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page - 1) * limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } fad_field_map = { "type": "assistance_type", "description": "award_description", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info( 'Getting legal entity location objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False) lel_bulk.append(legal_entity_location) logger.info('Bulk creating {} legal entity location rows...'.format( len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info( 'Getting place of performance objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False) pop_bulk.append(pop_location) logger.info('Bulk creating {} place of performance rows...'.format( len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting legal entity objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get('awardee_or_recipient_legal', '') legal_entity = LegalEntity.objects.filter( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity_value_map = { "location": lel_bulk[index - 1], } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info('Bulk creating {} legal entity rows...'.format( len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') awarding_agency_list = [] funding_agency_list = [] logger.info('Getting award objects for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"]) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get('fain'), uri=row.get('uri'), save=False) award_bulk.append(award) award_update_id_list.append(award.id) logger.info('Bulk creating {} award rows...'.format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting transaction_normalized for {} rows...'.format( len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_normalized = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction_normalized.fiscal_year = fy( transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info('Bulk creating {} TransactionNormalized rows...'.format( len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create( transaction_normalized_bulk) except IntegrityError: logger.info( 'Tried and failed to insert duplicate transaction_normalized row. Continuing... ' ) for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) transaction_assistance = TransactionFABS( transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info('Bulk creating TransactionFABS rows...') try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ')
def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing): """ Process and load file D2 broker data (financial assistance award txns). """ d_start_time = time.time() if quick: setup_broker_fdw() parameters = { 'broker_submission_id': submission_attributes.broker_submission_id } run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters) logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time)) return legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } total_rows = len(award_financial_assistance_data) start_time = datetime.now() for index, row in enumerate(award_financial_assistance_data, 1): if not (index % 100): logger.info('D2 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) row = row_preprocessor(row) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri')) award.save() award_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() fad_value_map = { "submission": submission_attributes, "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, field_map=fad_field_map, value_map=fad_value_map, as_dict=True) transaction_assistance = TransactionFABS.get_or_create_2( transaction=transaction, **financial_assistance_data) transaction_assistance.save() logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000): # logger.info("Getting IDs for what's currently in the DB...") # current_ids = TransactionFABS.objects # # if fiscal_year: # current_ids = current_ids.filter(action_date__fy=fiscal_year) # # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True) query = "SELECT * FROM published_award_financial_assistance" arguments = [] fy_begin = '10/01/' + str(fiscal_year - 1) fy_end = '09/30/' + str(fiscal_year) if fiscal_year: if arguments: query += " AND" else: query += " WHERE" query += ' action_date::Date BETWEEN %s AND %s' arguments += [fy_begin] arguments += [fy_end] query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s' arguments += [limit, (page-1)*limit] logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1], arguments[2], arguments[3])) db_cursor.execute(query, arguments) logger.info("Running dictfetchall on db_cursor") award_financial_assistance_data = dictfetchall(db_cursor) legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } fad_field_map = { "type": "assistance_type", "description": "award_description", } logger.info("Getting total rows") # rows_loaded = len(current_ids) total_rows = len(award_financial_assistance_data) # - rows_loaded logger.info("Processing " + str(total_rows) + " rows of assistance data") # skip_count = 0 # ROW ITERATION STARTS HERE lel_bulk = [] pop_bulk = [] legal_entity_bulk = [] award_bulk = [] transaction_assistance_bulk = [] transaction_normalized_bulk = [] logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Recipient flag is true for LeL legal_entity_location = get_or_create_location( legal_entity_location_field_map, row, {"recipient_flag": True}, save=False ) lel_bulk.append(legal_entity_location) logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk))) try: Location.objects.bulk_create(lel_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # Place of Performance flag is true for PoP pop_location = get_or_create_location( place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False ) pop_bulk.append(pop_location) logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk))) try: Location.objects.bulk_create(pop_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): recipient_name = row.get('awardee_or_recipient_legal', '') legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name).first() if legal_entity is None: legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) legal_entity_value_map = { "location": lel_bulk[index - 1], } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False) legal_entity_bulk.append(legal_entity) logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk))) try: LegalEntity.objects.bulk_create(legal_entity_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') awarding_agency_list = [] funding_agency_list = [] logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = \ subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = toptier_agency_map[funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) funding_agency = Agency.get_by_toptier_subtier( row['funding_agency_code'], row["funding_sub_tier_agency_co"] ) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) # award.save() is called in Award.get_or_create_summary_award by default created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, fain=row.get('fain'), uri=row.get('uri'), save=False ) award_bulk.append(award) award_update_id_list.append(award.id) logger.info('Bulk creating {} award rows...'.format(len(award_bulk))) try: Award.objects.bulk_create(award_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ') logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data))) for index, row in enumerate(award_financial_assistance_data, 1): parent_txn_value_map = { "award": award_bulk[index - 1], "awarding_agency": awarding_agency_list[index - 1], "funding_agency": funding_agency_list[index - 1], "recipient": legal_entity_bulk[index - 1], "place_of_performance": pop_bulk[index - 1], "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction_normalized.fiscal_year = fy(transaction_normalized.action_date) transaction_normalized_bulk.append(transaction_normalized) logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk))) try: TransactionNormalized.objects.bulk_create(transaction_normalized_bulk) except IntegrityError: logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ') for index, row in enumerate(award_financial_assistance_data, 1): financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, as_dict=True) transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1], **financial_assistance_data) transaction_assistance_bulk.append(transaction_assistance) logger.info('Bulk creating TransactionFABS rows...') try: TransactionFABS.objects.bulk_create(transaction_assistance_bulk) except IntegrityError: logger.info('!!! DUPLICATES FOUND. Continuing... ')
def test_get_or_create_summary_award(): """Test award record lookup.""" sta1 = mommy.make(SubtierAgency, subtier_code='1234', name='Bureau of Effective Unit Tests') sta2 = mommy.make(SubtierAgency, subtier_code='5678', name='Bureau of Breaky Unit Tests') sta3 = mommy.make(SubtierAgency, subtier_code='0509', name='Bureau of Testing Bureaucracy') tta1 = mommy.make(ToptierAgency, cgac_code='020', name='Department of Unit Tests') tta2 = mommy.make(ToptierAgency, cgac_code='089', name='Department of Integrated Tests') a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1) a2 = mommy.make(Agency, id=2, toptier_agency=tta1) a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2) a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3) # match on awarding agency and piid m1 = mommy.make('awards.award', piid='DUT123', awarding_agency=a1) t1 = Award.get_or_create_summary_award(piid='DUT123', awarding_agency=a1)[1] assert t1 == m1 # match on awarding agency and piid + parent award pa1 = mommy.make('awards.award', piid='IDVDUT456') m2 = mommy.make('awards.award', piid='DUT456', parent_award=pa1, awarding_agency=a1) t2 = Award.get_or_create_summary_award(piid='DUT456', parent_award_id='IDVDUT456', awarding_agency=a1)[1] assert t2 == m2 # match on awarding agency and fain m3 = mommy.make('awards.award', fain='DUT789', awarding_agency=a1) t3 = Award.get_or_create_summary_award(fain='DUT789', awarding_agency=a1)[1] assert t3 == m3 # match on awarding agency and fain + uri (fain takes precedence, same uri) m4 = mommy.make('awards.award', fain='DUT987', uri='123-abc-456', awarding_agency=a1) t4 = Award.get_or_create_summary_award(fain='DUT987', uri='123-abc-456', awarding_agency=a1)[1] assert t4 == m4 # match on awarding agency and fain + uri (fain takes precedence, different uri) m5 = mommy.make('awards.award', fain='DUT123456', uri='123-abc-456', awarding_agency=a1) t5 = Award.get_or_create_summary_award(fain='DUT123456', uri='123-abc-456-a-different-uri', awarding_agency=a1)[1] assert t5 == m5 # match on awarding agency + uri m6 = mommy.make('awards.award', uri='abc-123-def', awarding_agency=a1) t6 = Award.get_or_create_summary_award(uri='abc-123-def', awarding_agency=a1)[1] assert t6 == m6 # match on awarding toptier agency only m7 = mommy.make('awards.award', uri='kkk-bbb-jjj', awarding_agency=a2) t7 = Award.get_or_create_summary_award(uri='kkk-bbb-jjj', awarding_agency=a2)[1] assert m7 == t7 # match on no awarding agency, uri m8 = mommy.make('awards.award', uri='mmm-fff-ddd') t8 = Award.get_or_create_summary_award(uri='mmm-fff-ddd')[1] assert m8 == t8 # match on no awarding agency, fain m9 = mommy.make('awards.award', fain='DUTDUTDUT') t9 = Award.get_or_create_summary_award(fain='DUTDUTDUT')[1] assert m9 == t9 # non-match with piid creates new award record m10 = mommy.make('awards.award', piid='imapiid') t10 = Award.get_or_create_summary_award(piid='imadifferentpiid') assert len(t10[0]) == 1 assert m10 != t10[1] # non-match with piid + non-matching parent award creates two new awards pa11 = mommy.make('awards.award', piid='momofpiidsarefun') m11 = mommy.make('awards.award', piid='piidsarefun', parent_award=pa11) t11 = Award.get_or_create_summary_award(piid='piidsarefun', parent_award_id='dadofpiidsarefun') assert len(t11[0]) == 2 assert m11 != t11[1] # non-match with piid + matching parent award creates one new award pa12 = mommy.make('awards.award', piid='thingmom') m12 = mommy.make('awards.award', piid='thing1', parent_award=pa12) t12 = Award.get_or_create_summary_award(piid='thing2', parent_award_id='thingmom') assert len(t12[0]) == 1 assert m12 != t12[1] # matching piid + non-matching parent pa13 = mommy.make('awards.award', piid='piidthing') m13 = mommy.make('awards.award', piid='0005', parent_award=pa13) t13 = Award.get_or_create_summary_award( piid='0005', parent_award_id='anotherpiidthing')[1] assert t13 != m13 # matching piid and parent award id but mismatched subtier agency, same top tier agency pa14 = mommy.make('awards.award', piid='imalittlepiid', awarding_agency=a1) m14 = mommy.make('awards.award', piid='shortandstout', parent_award=pa14, awarding_agency=a1) t14 = Award.get_or_create_summary_award(piid='shortandstout', parent_award_id='imalittlepiid', awarding_agency=a3)[1] assert t14 != m14 # matching fain but mismatched subtier agency, same top tier agency m15 = mommy.make('awards.award', fain='imalittlefain', parent_award=pa14, awarding_agency=a1) t15 = Award.get_or_create_summary_award(fain='imalittlefain', awarding_agency=a3)[1] assert t15 != m15 # matching piid and parent award but mismatched subtier and toptier agency pa16 = mommy.make('awards.Award', piid='imjustapiidparent', awarding_agency=a3) m16 = mommy.make('awards.Award', piid='imjustapiidchild', awarding_agency=a3) t16 = Award.get_or_create_summary_award( piid='imjustapiidchild', parent_award_id='imjustapiidparent', awarding_agency=a4)[1] assert t16 != m16
def load_file_d1(submission_attributes, procurement_data, db_cursor): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } for row in procurement_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'], } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) contract_value_map = { 'transaction': transaction_instance, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model(TransactionContract(), row, field_map=contract_field_map, value_map=contract_value_map, save=True)
def load_awards(self, fabs_broker_data, total_rows): start_time = datetime.now() for index, row in enumerate(fabs_broker_data, 1): if not (index % 10000): logger.info('Awards: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: awarding_subtier_agency_id = self.subtier_agency_map[ row["awarding_sub_tier_agency_c"]] awarding_toptier_agency_id = self.subtier_to_agency_map[ awarding_subtier_agency_id]['toptier_agency_id'] awarding_cgac_code = self.toptier_agency_map[ awarding_toptier_agency_id] row['awarding_agency_code'] = awarding_cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_subtier_agency_id = self.subtier_agency_map.get( row["funding_sub_tier_agency_co"]) if funding_subtier_agency_id is not None: funding_toptier_agency_id = self.subtier_to_agency_map[ funding_subtier_agency_id]['toptier_agency_id'] funding_cgac_code = self.toptier_agency_map[ funding_toptier_agency_id] else: funding_cgac_code = None row['funding_agency_code'] = funding_cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = self.agency_no_sub_map.get( (row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])) if awarding_agency is None: awarding_agency = self.agency_sub_only_map.get( row['awarding_agency_code']) # If we still haven't found the agency, try surmising it from subtier if awarding_agency is None: awarding_agency = self.agency_subtier_map.get( row['awarding_sub_tier_agency_c']) funding_agency = self.agency_no_sub_map.get( (row['funding_agency_code'], row["funding_sub_tier_agency_co"])) if funding_agency is None: funding_agency = self.agency_sub_only_map.get( row['funding_agency_code']) if funding_agency is None: funding_agency = self.agency_subtier_map.get( row['funding_sub_tier_agency_co']) awarding_agency_list.append(awarding_agency) funding_agency_list.append(funding_agency) fain = row.get('fain') uri = row.get('uri') if awarding_agency: lookup_key = (fain, uri, awarding_agency.id) award = self.award_map.get(lookup_key) else: lookup_key = None award = None if not award: # create the award since it wasn't found create_kwargs = { 'awarding_agency': awarding_agency, 'fain': fain, 'uri': uri } award = Award(**create_kwargs) if awarding_agency and lookup_key: self.award_map[lookup_key] = award award_bulk.append(award) award_lookup.append(award) logger.info( 'Bulk creating Awards (batch_size: {})...'.format(BATCH_SIZE)) Award.objects.bulk_create(award_bulk, batch_size=BATCH_SIZE)
def insert_new_fabs(self, to_insert): place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perform_country_n", "state_code": "place_of_perfor_state_code", "state_name": "place_of_perform_state_nam", "city_name": "place_of_performance_city", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "foreign_location_description": "place_of_performance_forei", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "performance_code": "place_of_performance_code", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5", } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "city_name": "legal_entity_city_name", "city_code": "legal_entity_city_code", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "foreign_location_description": "legal_entity_foreign_descr", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "foreign_city_name": "legal_entity_foreign_city", } for row in to_insert: upper_case_dict_values(row) # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, {"recipient_flag": True}) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "", parent_recipient_unique_id=row['ultimate_parent_unique_ide'], ) legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fabs'), "business_types_description": row['business_types_desc'], } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri # this will raise an exception if the cast to an int fails, that's ok since we don't want to process # non-numeric record type values record_type_int = int(row['record_type']) if record_type_int == 1: uri = row['uri'] if row['uri'] else '-NONE-' fain = '-NONE-' elif record_type_int in (2, 3): uri = '-NONE-' fain = row['fain'] if row['fain'] else '-NONE-' else: msg = "Invalid record type encountered for the following afa_generated_unique record: {}" raise Exception(msg.format(row['afa_generated_unique'])) astac = row["awarding_sub_tier_agency_c"] if row[ "awarding_sub_tier_agency_c"] else "-NONE-" generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri) # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, fain=row['fain'], uri=row['uri'], record_type=row['record_type'], ) award.save() # Append row to list of Awards updated AWARD_UPDATE_ID_LIST.append(award.id) try: last_mod_date = datetime.strptime(str( row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "type_description": row['assistance_type_desc'], "transaction_unique_id": row['afa_generated_unique'], "generated_unique_award_id": generated_unique_id, } fad_field_map = { "type": "assistance_type", "description": "award_description", "funding_amount": "total_funding_amount", } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True, ) financial_assistance_data = load_data_into_model( TransactionFABS(), row, as_dict=True) # thrown away afa_generated_unique = financial_assistance_data[ 'afa_generated_unique'] unique_fabs = TransactionFABS.objects.filter( afa_generated_unique=afa_generated_unique) if unique_fabs.first(): transaction_normalized_dict["update_date"] = datetime.now( timezone.utc) transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # Update TransactionNormalized TransactionNormalized.objects.filter( id=unique_fabs.first().transaction.id).update( **transaction_normalized_dict) # Update TransactionFABS unique_fabs.update(**financial_assistance_data) else: # Create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # Create TransactionFABS transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data) transaction_fabs.save() # Update legal entity to map back to transaction legal_entity.transaction_unique_id = afa_generated_unique legal_entity.save()
def test_get_or_create_summary_award(): """Test award record lookup.""" sta1 = mommy.make(SubtierAgency, subtier_code="1234", name="Bureau of Effective Unit Tests") sta2 = mommy.make(SubtierAgency, subtier_code="5678", name="Bureau of Breaky Unit Tests") sta3 = mommy.make(SubtierAgency, subtier_code="0509", name="Bureau of Testing Bureaucracy") tta1 = mommy.make(ToptierAgency, toptier_code="020", name="Department of Unit Tests") tta2 = mommy.make(ToptierAgency, toptier_code="089", name="Department of Integrated Tests") a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1) a2 = mommy.make(Agency, id=2, toptier_agency=tta1) a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2) a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3) # match on awarding agency and piid m1 = mommy.make("awards.award", piid="DUT123", awarding_agency=a1) t1 = Award.get_or_create_summary_award(piid="DUT123", awarding_agency=a1)[1] assert t1 == m1 # match on awarding agency and piid + parent award piid m2 = mommy.make("awards.award", piid="DUT456", parent_award_piid="IDVDUT456", awarding_agency=a1) t2 = Award.get_or_create_summary_award(piid="DUT456", parent_award_piid="IDVDUT456", awarding_agency=a1)[1] assert t2 == m2 # match on awarding agency and fain m3 = mommy.make("awards.award", fain="DUT789", awarding_agency=a1) t3 = Award.get_or_create_summary_award(fain="DUT789", awarding_agency=a1)[1] assert t3 == m3 # match on awarding agency and fain + uri (fain takes precedence, same uri) m4 = mommy.make("awards.award", fain="DUT987", awarding_agency=a1) t4 = Award.get_or_create_summary_award(fain="DUT987", uri="123-abc-456", record_type="2", awarding_agency=a1)[1] assert t4 == m4 # match on awarding agency + uri m5 = mommy.make("awards.award", uri="abc-123-def", awarding_agency=a1) t5 = Award.get_or_create_summary_award(uri="abc-123-def", record_type="1", awarding_agency=a1)[1] assert t5 == m5 # match on awarding toptier agency only m6 = mommy.make("awards.award", uri="kkk-bbb-jjj", awarding_agency=a2) t6 = Award.get_or_create_summary_award(uri="kkk-bbb-jjj", record_type="1", awarding_agency=a2)[1] assert m6 == t6 # match on no awarding agency, uri m7 = mommy.make("awards.award", uri="mmm-fff-ddd") t7 = Award.get_or_create_summary_award(uri="mmm-fff-ddd", record_type="1")[1] assert m7 == t7 # match on no awarding agency, fain m8 = mommy.make("awards.award", fain="DUTDUTDUT") t8 = Award.get_or_create_summary_award(fain="DUTDUTDUT", record_type="2")[1] assert m8 == t8 # non-match with piid creates new award record m9 = mommy.make("awards.award", piid="imapiid") t9 = Award.get_or_create_summary_award(piid="imadifferentpiid") assert len(t9[0]) == 1 assert m9 != t9[1] # non-match with piid + matching parent award piid creates one new award m10 = mommy.make("awards.award", piid="thing1", parent_award_piid="thingmom") t10 = Award.get_or_create_summary_award(piid="thing2", parent_award_piid="thingmom") assert len(t10[0]) == 1 assert m10 != t10[1] # matching piid + non-matching parent m11 = mommy.make("awards.award", piid="0005", parent_award_piid="piidthing") t11 = Award.get_or_create_summary_award( piid="0005", parent_award_piid="anotherpiidthing") assert m11 != t11[0] # matching piid and parent award id but mismatched subtier agency, same top tier agency m12 = mommy.make("awards.award", piid="shortandstout", parent_award_piid="imalittlepiid", awarding_agency=a1) t12 = Award.get_or_create_summary_award(piid="shortandstout", parent_award_piid="imalittlepiid", awarding_agency=a3)[1] assert t12 != m12 # matching fain but mismatched subtier agency, same top tier agency m13 = mommy.make("awards.award", fain="imalittlefain", awarding_agency=a1) t13 = Award.get_or_create_summary_award(fain="imalittlefain", record_type="2", awarding_agency=a3)[1] assert t13 != m13 # matching piid and parent award but mismatched subtier and toptier agency mommy.make("awards.Award", piid="imjustapiidparent", awarding_agency=a3) m14 = mommy.make("awards.Award", piid="imjustapiidchild", awarding_agency=a3) t14 = Award.get_or_create_summary_award( piid="imjustapiidchild", parent_award_piid="imjustapiidparent", awarding_agency=a4)[1] assert t14 != m14 # matching piid and parent award but mismatched subtier and toptier agency m15 = mommy.make("awards.Award", generated_unique_award_id="this_is_generated_and_unique") t15 = Award.get_or_create_summary_award( generated_unique_award_id="this_is_generated_and_unique") assert m15 == t15[1] # matching piid and parent award but mismatched subtier and toptier agency m16 = mommy.make("awards.Award", generated_unique_award_id="this_is_generated_and_unique") t16 = Award.get_or_create_summary_award( generated_unique_award_id="this_is_generated_unique_and_different") assert len(t16[0]) == 1 assert m16 != t16[1]
def insert_new_fpds(self, to_insert, total_rows): logger.info('Starting insertion of new FPDS data') place_of_performance_field_map = { "location_country_code": "place_of_perform_country_c", "country_name": "place_of_perf_country_desc", "state_code": "place_of_performance_state", "state_name": "place_of_perfor_state_desc", "city_name": "place_of_perform_city_name", "county_name": "place_of_perform_county_na", "county_code": "place_of_perform_county_co", "zip_4a": "place_of_performance_zip4a", "congressional_code": "place_of_performance_congr", "zip_last4": "place_of_perform_zip_last4", "zip5": "place_of_performance_zip5" } legal_entity_location_field_map = { "location_country_code": "legal_entity_country_code", "country_name": "legal_entity_country_name", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_descrip", "city_name": "legal_entity_city_name", "county_name": "legal_entity_county_name", "county_code": "legal_entity_county_code", "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "zip4": "legal_entity_zip4", "congressional_code": "legal_entity_congressional", "zip_last4": "legal_entity_zip_last4", "zip5": "legal_entity_zip5" } start_time = datetime.now() for index, row in enumerate(to_insert, 1): if not (index % 1000): logger.info( 'Inserting Stale FPDS: Inserting row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) for key in row: if isinstance(row[key], str): row[key] = row[key].upper() # Create new LegalEntityLocation and LegalEntity from the row data legal_entity_location = create_location( legal_entity_location_field_map, row, { "recipient_flag": True, "is_fpds": True }) recipient_name = row['awardee_or_recipient_legal'] legal_entity = LegalEntity.objects.create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name if recipient_name is not None else "") legal_entity_value_map = { "location": legal_entity_location, "business_categories": get_business_categories(row=row, data_type='fpds'), "is_fpds": True } set_legal_entity_boolean_fields(row) legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True}) # Find the toptier awards from the subtier awards awarding_agency = Agency.get_by_subtier_only( row["awarding_sub_tier_agency_c"]) funding_agency = Agency.get_by_subtier_only( row["funding_sub_tier_agency_co"]) # Generate the unique Award ID # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \ (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \ (row['piid'] if row['piid'] else '-NONE-') + '_' + \ (row['parent_award_id'] if row['parent_award_id'] else '-NONE-') # Create the summary Award (created, award) = Award.get_or_create_summary_award( generated_unique_award_id=generated_unique_id, piid=row['piid']) award.parent_award_piid = row.get('parent_award_id') award.save() # Append row to list of Awards updated award_update_id_list.append(award.id) try: last_mod_date = datetime.strptime(str( row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date() except ValueError: last_mod_date = datetime.strptime(str(row['last_modified']), "%Y-%m-%d %H:%M:%S").date() parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": funding_agency, "recipient": legal_entity, "place_of_performance": pop_location, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), "last_modified_date": last_mod_date, "transaction_unique_id": row['detached_award_proc_unique'], "generated_unique_award_id": generated_unique_id, "is_fpds": True } contract_field_map = { "type": "contract_award_type", "type_description": "contract_award_type_desc", "description": "award_description" } transaction_normalized_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, as_dict=True) detached_award_proc_unique = contract_instance[ 'detached_award_proc_unique'] unique_fpds = TransactionFPDS.objects.filter( detached_award_proc_unique=detached_award_proc_unique) if unique_fpds.first(): transaction_normalized_dict["update_date"] = datetime.utcnow() transaction_normalized_dict["fiscal_year"] = fy( transaction_normalized_dict["action_date"]) # update TransactionNormalized TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\ update(**transaction_normalized_dict) # update TransactionFPDS unique_fpds.update(**contract_instance) else: # create TransactionNormalized transaction = TransactionNormalized( **transaction_normalized_dict) transaction.save() # create TransactionFPDS transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance) transaction_fpds.save()
def test_get_or_create_summary_award(): """Test award record lookup.""" sta1 = mommy.make(SubtierAgency, subtier_code='1234', name='Bureau of Effective Unit Tests') sta2 = mommy.make(SubtierAgency, subtier_code='5678', name='Bureau of Breaky Unit Tests') sta3 = mommy.make(SubtierAgency, subtier_code='0509', name='Bureau of Testing Bureaucracy') tta1 = mommy.make(ToptierAgency, cgac_code='020', name='Department of Unit Tests') tta2 = mommy.make(ToptierAgency, cgac_code='089', name='Department of Integrated Tests') a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1) a2 = mommy.make(Agency, id=2, toptier_agency=tta1) a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2) a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3) # match on awarding agency and piid m1 = mommy.make('awards.award', piid='DUT123', awarding_agency=a1) t1 = Award.get_or_create_summary_award(piid='DUT123', awarding_agency=a1)[1] assert t1 == m1 # match on awarding agency and piid + parent award piid m2 = mommy.make('awards.award', piid='DUT456', parent_award_piid='IDVDUT456', awarding_agency=a1) t2 = Award.get_or_create_summary_award(piid='DUT456', parent_award_piid='IDVDUT456', awarding_agency=a1)[1] assert t2 == m2 # match on awarding agency and fain m3 = mommy.make('awards.award', fain='DUT789', awarding_agency=a1) t3 = Award.get_or_create_summary_award(fain='DUT789', awarding_agency=a1)[1] assert t3 == m3 # match on awarding agency and fain + uri (fain takes precedence, same uri) m4 = mommy.make('awards.award', fain='DUT987', awarding_agency=a1) t4 = Award.get_or_create_summary_award(fain='DUT987', uri='123-abc-456', record_type='2', awarding_agency=a1)[1] assert t4 == m4 # match on awarding agency + uri m5 = mommy.make('awards.award', uri='abc-123-def', awarding_agency=a1) t5 = Award.get_or_create_summary_award(uri='abc-123-def', record_type='1', awarding_agency=a1)[1] assert t5 == m5 # match on awarding toptier agency only m6 = mommy.make('awards.award', uri='kkk-bbb-jjj', awarding_agency=a2) t6 = Award.get_or_create_summary_award(uri='kkk-bbb-jjj', record_type='1', awarding_agency=a2)[1] assert m6 == t6 # match on no awarding agency, uri m7 = mommy.make('awards.award', uri='mmm-fff-ddd') t7 = Award.get_or_create_summary_award(uri='mmm-fff-ddd', record_type='1')[1] assert m7 == t7 # match on no awarding agency, fain m8 = mommy.make('awards.award', fain='DUTDUTDUT') t8 = Award.get_or_create_summary_award(fain='DUTDUTDUT', record_type='2')[1] assert m8 == t8 # non-match with piid creates new award record m9 = mommy.make('awards.award', piid='imapiid') t9 = Award.get_or_create_summary_award(piid='imadifferentpiid') assert len(t9[0]) == 1 assert m9 != t9[1] # non-match with piid + matching parent award piid creates one new award m10 = mommy.make('awards.award', piid='thing1', parent_award_piid='thingmom') t10 = Award.get_or_create_summary_award(piid='thing2', parent_award_piid='thingmom') assert len(t10[0]) == 1 assert m10 != t10[1] # matching piid + non-matching parent m11 = mommy.make('awards.award', piid='0005', parent_award_piid='piidthing') t11 = Award.get_or_create_summary_award(piid='0005', parent_award_piid='anotherpiidthing') assert m11 != t11[0] # matching piid and parent award id but mismatched subtier agency, same top tier agency m12 = mommy.make('awards.award', piid='shortandstout', parent_award_piid='imalittlepiid', awarding_agency=a1) t12 = Award.get_or_create_summary_award(piid='shortandstout', parent_award_piid='imalittlepiid', awarding_agency=a3)[1] assert t12 != m12 # matching fain but mismatched subtier agency, same top tier agency m13 = mommy.make('awards.award', fain='imalittlefain', awarding_agency=a1) t13 = Award.get_or_create_summary_award(fain='imalittlefain', record_type='2', awarding_agency=a3)[1] assert t13 != m13 # matching piid and parent award but mismatched subtier and toptier agency mommy.make('awards.Award', piid='imjustapiidparent', awarding_agency=a3) m14 = mommy.make('awards.Award', piid='imjustapiidchild', awarding_agency=a3) t14 = Award.get_or_create_summary_award(piid='imjustapiidchild', parent_award_piid='imjustapiidparent', awarding_agency=a4)[1] assert t14 != m14 # matching piid and parent award but mismatched subtier and toptier agency m15 = mommy.make('awards.Award', generated_unique_award_id='this_is_generated_and_unique') t15 = Award.get_or_create_summary_award(generated_unique_award_id='this_is_generated_and_unique') assert m15 == t15[1] # matching piid and parent award but mismatched subtier and toptier agency m16 = mommy.make('awards.Award', generated_unique_award_id='this_is_generated_and_unique') t16 = Award.get_or_create_summary_award(generated_unique_award_id='this_is_generated_unique_and_different') assert len(t16[0]) == 1 assert m16 != t16[1]