def test_get_by_subtier(): """Test Agency lookup by subtier.""" toptier = mommy.make("references.ToptierAgency", toptier_code="xyz", name="yo") subtier = mommy.make("references.SubtierAgency", subtier_code="abc", name="hi") mommy.make( "references.Agency", toptier_agency=toptier, subtier_agency=mommy.make("references.SubtierAgency", subtier_code="bbb"), ) agency1 = mommy.make("references.Agency", toptier_agency=toptier, subtier_agency=subtier) # lookup should return agency w/ most recent updatea_date that # matches the subtier code assert Agency.get_by_subtier("abc") == agency1 # if there's no match, we should get none assert Agency.get_by_subtier("nope") is None # if called with an empty argument, we should get None assert Agency.get_by_subtier("") is None assert Agency.get_by_subtier(None) is None
def get_valid_awarding_agency(row): agency = None agency_subtier_code = row['contracting_office_aid'] valid_subtier_code = agency_subtier_code and len(agency_subtier_code) > 0 # Get the awarding agency if valid_subtier_code: agency = Agency.get_by_subtier(agency_subtier_code) return agency
def test_get_by_subtier(): """Test Agency lookup by subtier.""" toptier = mommy.make('references.ToptierAgency', cgac_code='xyz', name='yo') subtier = mommy.make('references.SubtierAgency', subtier_code='abc', name='hi') mommy.make( 'references.Agency', toptier_agency=toptier, subtier_agency=subtier) mommy.make( 'references.Agency', toptier_agency=toptier, subtier_agency=mommy.make('references.SubtierAgency', subtier_code='bbb')) agency1 = mommy.make( 'references.Agency', toptier_agency=toptier, subtier_agency=subtier) # lookup should return agency w/ most recent updatea_date that # matches the subtier code assert Agency.get_by_subtier('abc') == agency1 # if there's no match, we should get none assert Agency.get_by_subtier('nope') is None # if called with an empty argument, we should get None assert Agency.get_by_subtier('') is None assert Agency.get_by_subtier(None) is None
def get_valid_awarding_agency(row): agency_subtier_code = row['awarding_sub_tier_agency_c'] agency_toptier_code = row['awarding_agency_code'] valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0) valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0) if not valid_toptier_code and not valid_subtier_code: return None agency = None # Get the awarding agency if valid_subtier_code and valid_toptier_code: agency = Agency.get_by_toptier_subtier(row['awarding_agency_code'], row['awarding_sub_tier_agency_c']) if not agency and valid_subtier_code: agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c']) if not agency and valid_toptier_code: agency = Agency.get_by_toptier(row['awarding_agency_code']) return agency
def get_valid_awarding_agency(row): agency_subtier_code = row['awarding_sub_tier_agency_c'] agency_toptier_code = row['awarding_agency_code'] valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0) valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0) if not valid_toptier_code and not valid_subtier_code: return None agency = None # Get the awarding agency if valid_subtier_code and valid_toptier_code: agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row['awarding_sub_tier_agency_c']) if not agency and valid_subtier_code: agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c']) if not agency and valid_toptier_code: agency = Agency.get_by_toptier(row['awarding_agency_code']) return agency
def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor): """ Process and load file D2 broker data (financial assistance award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } for row in award_financial_assistance_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'] } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) fad_value_map = { "transaction": transaction_instance, "submission": submission_attributes, "cfda": CFDAProgram.objects.filter( program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionAssistance(), row, field_map=fad_field_map, value_map=fad_value_map, save=True)
def load_file_d1(submission_attributes, procurement_data, db_cursor): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } for row in procurement_data: legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) # Create the legal entity if it doesn't exist try: legal_entity = LegalEntity.objects.get( recipient_unique_id=row['awardee_or_recipient_uniqu']) except ObjectDoesNotExist: legal_entity_value_map = { "location": legal_entity_location, "legal_entity_id": row['awardee_or_recipient_uniqu'], } legal_entity = load_data_into_model( LegalEntity(), row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up if row['awarding_agency_code'] is None: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code if row['funding_agency_code'] is None: row['funding_agency_code'] = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_id=row.get('parent_award_id')) award.save() AWARD_UPDATE_ID_LIST.append(award.id) AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_instance = load_data_into_model( Transaction(), row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction_instance, created = Transaction.objects.get_or_create( **transaction_instance) contract_value_map = { 'transaction': transaction_instance, 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model(TransactionContract(), row, field_map=contract_field_map, value_map=contract_value_map, save=True)
def load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing): """ Process and load file D2 broker data (financial assistance award txns). """ d_start_time = time.time() if quick: setup_broker_fdw() parameters = { 'broker_submission_id': submission_attributes.broker_submission_id } run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters) logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time)) return legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = {"recipient_flag": True} place_of_performance_value_map = {"place_of_performance_flag": True} fad_field_map = { "type": "assistance_type", "description": "award_description", } total_rows = len(award_financial_assistance_data) start_time = datetime.now() for index, row in enumerate(award_financial_assistance_data, 1): if not (index % 100): logger.info('D2 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) row = row_preprocessor(row) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri')) award.save() award_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() fad_value_map = { "submission": submission_attributes, "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, field_map=fad_field_map, value_map=fad_value_map, as_dict=True) transaction_assistance = TransactionFABS.get_or_create_2( transaction=transaction, **financial_assistance_data) transaction_assistance.save() logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = {"place_of_performance_flag": True} legal_entity_location_value_map = {"recipient_flag": True} contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = { 'broker_submission_id': submission_attributes.broker_submission_id } run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map)) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model( legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len( row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len( row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier( row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"]) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get( 'parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction( **transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
def load_file_d2( submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing ): """ Process and load file D2 broker data (financial assistance award txns). """ d_start_time = time.time() if quick: setup_broker_fdw() parameters = {'broker_submission_id': submission_attributes.broker_submission_id} run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters) logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time)) return legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "city_code": "legal_entity_city_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "county_code": "legal_entity_county_code", "county_name": "legal_entity_county_name", "foreign_city_name": "legal_entity_foreign_city", "foreign_postal_code": "legal_entity_foreign_posta", "foreign_province": "legal_entity_foreign_provi", "state_code": "legal_entity_state_code", "state_name": "legal_entity_state_name", "zip5": "legal_entity_zip5", "zip_last4": "legal_entity_zip_last4", "location_country_code": "legal_entity_country_code" } place_of_performance_field_map = { "city_name": "place_of_performance_city", "performance_code": "place_of_performance_code", "congressional_code": "place_of_performance_congr", "county_name": "place_of_perform_county_na", "foreign_location_description": "place_of_performance_forei", "state_name": "place_of_perform_state_nam", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } legal_entity_location_value_map = { "recipient_flag": True } place_of_performance_value_map = { "place_of_performance_flag": True } fad_field_map = { "type": "assistance_type", "description": "award_description", } total_rows = len(award_financial_assistance_data) start_time = datetime.now() for index, row in enumerate(award_financial_assistance_data, 1): if not (index % 100): logger.info('D2 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) row = row_preprocessor(row) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, legal_entity_location_value_map ) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, place_of_performance_value_map ) # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri')) award.save() award_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=fad_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() fad_value_map = { "submission": submission_attributes, "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(), 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), } financial_assistance_data = load_data_into_model( TransactionFABS(), # thrown away row, field_map=fad_field_map, value_map=fad_value_map, as_dict=True) transaction_assistance = TransactionFABS.get_or_create_2(transaction=transaction, **financial_assistance_data) transaction_assistance.save() logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False): """ Process and load file D1 broker data (contract award txns). """ legal_entity_location_field_map = { "address_line1": "legal_entity_address_line1", "address_line2": "legal_entity_address_line2", "address_line3": "legal_entity_address_line3", "location_country_code": "legal_entity_country_code", "city_name": "legal_entity_city_name", "congressional_code": "legal_entity_congressional", "state_code": "legal_entity_state_code", "zip4": "legal_entity_zip4" } place_of_performance_field_map = { # not sure place_of_performance_locat maps exactly to city name "city_name": "place_of_performance_locat", "congressional_code": "place_of_performance_congr", "state_code": "place_of_performance_state", "zip4": "place_of_performance_zip4a", "location_country_code": "place_of_perform_country_c" } place_of_performance_value_map = { "place_of_performance_flag": True } legal_entity_location_value_map = { "recipient_flag": True } contract_field_map = { "type": "contract_award_type", "description": "award_description" } d_start_time = time.time() if quick: parameters = {'broker_submission_id': submission_attributes.broker_submission_id} run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters) logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time)) return total_rows = len(procurement_data) start_time = datetime.now() for index, row in enumerate(procurement_data, 1): if not (index % 100): logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) legal_entity_location, created = get_or_create_location( legal_entity_location_field_map, row, copy(legal_entity_location_value_map) ) recipient_name = row['awardee_or_recipient_legal'] if recipient_name is None: recipient_name = "" # Create the legal entity if it doesn't exist legal_entity, created = LegalEntity.objects.get_or_create( recipient_unique_id=row['awardee_or_recipient_uniqu'], recipient_name=recipient_name ) if created: legal_entity_value_map = { "location": legal_entity_location, } legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True) # Create the place of performance location pop_location, created = get_or_create_location( place_of_performance_field_map, row, copy(place_of_performance_value_map)) # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record, # use the sub tier code to look it up. This code assumes that all incoming # records will supply an awarding subtier agency code if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1: row['awarding_agency_code'] = Agency.get_by_subtier( row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code # If funding toptier agency code (aka CGAC) is empty, try using the sub # tier funding code to look it up. Unlike the awarding agency, we can't # assume that the funding agency subtier code will always be present. if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1: funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"]) row['funding_agency_code'] = ( funding_agency.toptier_agency.cgac_code if funding_agency is not None else None) # Find the award that this award transaction belongs to. If it doesn't exist, create it. awarding_agency = Agency.get_by_toptier_subtier( row['awarding_agency_code'], row["awarding_sub_tier_agency_c"] ) created, award = Award.get_or_create_summary_award( awarding_agency=awarding_agency, piid=row.get('piid'), fain=row.get('fain'), uri=row.get('uri'), parent_award_piid=row.get('parent_award_id')) # It is a FAIN/PIID/URI, not our db's pk award.save() award_update_id_list.append(award.id) award_contract_update_id_list.append(award.id) parent_txn_value_map = { "award": award, "awarding_agency": awarding_agency, "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'], row["funding_sub_tier_agency_co"]), "recipient": legal_entity, "place_of_performance": pop_location, 'submission': submission_attributes, "period_of_performance_start_date": format_date(row['period_of_performance_star']), "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']), "action_date": format_date(row['action_date']), } transaction_dict = load_data_into_model( TransactionNormalized(), # thrown away row, field_map=contract_field_map, value_map=parent_txn_value_map, as_dict=True) transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict) transaction.save() contract_value_map = { 'submission': submission_attributes, 'reporting_period_start': submission_attributes.reporting_period_start, 'reporting_period_end': submission_attributes.reporting_period_end, "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e']) } contract_instance = load_data_into_model( TransactionFPDS(), # thrown away row, field_map=contract_field_map, value_map=contract_value_map, as_dict=True) transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance) transaction_contract.save() logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))