def handle(self, *args, **options): field_map = { "treasury_account_identifier": "ACCT_NUM", "account_title": "GWA_TAS NAME", "reporting_agency_id": "Agency AID", "reporting_agency_name": "Agency Name", "budget_bureau_code": "ADMIN_ORG", "budget_bureau_name": "Admin Org Name", "fr_entity_code": "FR Entity Type Code", "fr_entity_description": "FR Entity Description", "budget_function_code": "Function Code", "budget_function_title": "Function Description", "budget_subfunction_code": "Sub Function Code", "budget_subfunction_title": "Sub Function Description" } value_map = { "data_source": "USA", "tas_rendering_label": self.generate_tas_rendering_label, "allocation_transfer_agency_id": lambda row: row["ATA"].strip(), "agency_id": lambda row: row["AID"].strip(), "beginning_period_of_availability": lambda row: row["BPOA"].strip(), "ending_period_of_availability": lambda row: row["EPOA"].strip(), "availability_type_code": lambda row: row["A"].strip(), "main_account_code": lambda row: row["MAIN"].strip(), "sub_account_code": lambda row: row["SUB"].strip(), "awarding_toptier_agency": lambda row: ToptierAgency.objects.filter(cgac_code=row[ "ATA"].strip()).order_by("fpds_code").first(), "funding_toptier_agency": lambda row: ToptierAgency.objects.filter(cgac_code=row[ "AID"].strip()).order_by("fpds_code").first() } loader = ThreadedDataLoader( model_class=TreasuryAppropriationAccount, field_map=field_map, value_map=value_map, collision_field='treasury_account_identifier', collision_behavior='update', pre_row_function=self.skip_and_remove_financing_tas) loader.load_from_file(options['file'][0]) # update TAS fk relationships to federal accounts remove_empty_federal_accounts() update_federal_accounts() insert_federal_accounts()
def handle(self, *args, **options): possible_models = { "RefCityCountyCode": RefCityCountyCode, "RefCountryCode": RefCountryCode, "ObjectClass": ObjectClass, "RefProgramActivity": RefProgramActivity } model = options['model'][0] path = options['path'][0] encoding = options['encoding'][0] if options['model'][0] not in possible_models.keys(): logger.error("Model " + model + " is not supported") loader = ThreadedDataLoader(model_class=possible_models[model], collision_behavior='update') loader.load_from_file(path, encoding)
def handle(self, *args, **options): possible_models = { "RefCountryCode": RefCountryCode, "ObjectClass": ObjectClass, "RefProgramActivity": RefProgramActivity, } model = options["model"][0] path = options["path"][0] encoding = options["encoding"][0] if options["model"][0] not in possible_models.keys(): logger.error("Model " + model + " is not supported") loader = ThreadedDataLoader(model_class=possible_models[model], collision_behavior="update") loader.load_from_file(path, encoding)
def handle(self, *args, **options): field_map = { "federal_action_obligation": "dollarsobligated", "description": "descriptionofcontractrequirement", "modification_number": "modnumber" } value_map = { "data_source": "USA", "award": lambda row: Award.objects.get_or_create(piid=row['piid'], type='C') [0], "recipient": lambda row: LegalEntity.objects.get_or_create(recipient_name=row[ 'dunsnumber'])[0], "awarding_agency": lambda row: Agency.objects.get(subtier_code=self.get_agency_code( row['maj_agency_cat'])), "action_date": lambda row: self.convert_date(row['signeddate']), "last_modified_date": lambda row: self.convert_date(row['last_modified_date']), "gfe_gfp": lambda row: row['gfe_gfp'].split(":")[0], "submission": SubmissionAttributes.objects.all().first( ) # Probably want to change this? } loader = ThreadedDataLoader( Procurement, field_map=field_map, value_map=value_map, post_row_function=self.post_row_process_function) loader.load_from_file(options['file'][0])
def handle(self, *args, **options): field_map = { "treasury_account_identifier": "ACCT_NUM", "account_title": "GWA_TAS NAME", "reporting_agency_id": "Agency AID", "reporting_agency_name": "Agency Name", "budget_bureau_code": "ADMIN_ORG", "budget_bureau_name": "Admin Org Name", "fr_entity_code": "FR Entity Type Code", "fr_entity_description": "FR Entity Description", "budget_function_code": "Function Code", "budget_function_title": "Function Description", "budget_subfunction_code": "Sub Function Code", "budget_subfunction_title": "Sub Function Description" } value_map = { "data_source": "USA", "tas_rendering_label": self.generate_tas_rendering_label, "allocation_transfer_agency_id": lambda row: row["ATA"].strip(), "agency_id": lambda row: row["AID"].strip(), "beginning_period_of_availability": lambda row: row["BPOA"].strip(), "ending_period_of_availability": lambda row: row["EPOA"].strip(), "availability_type_code": lambda row: row["A"].strip(), "main_account_code": lambda row: row["MAIN"].strip(), "sub_account_code": lambda row: row["SUB"].strip() } loader = ThreadedDataLoader( model_class=TreasuryAppropriationAccount, field_map=field_map, value_map=value_map, collision_field='treasury_account_identifier', collision_behavior='update') loader.load_from_file(options['file'][0])
def handle(self, *args, **options): is_remote_file = len(options['location'][0].split('.')) == 1 if is_remote_file: s3connection = boto.s3.connect_to_region( os.environ.get('USASPENDING_AWS_REGION')) s3bucket = s3connection.lookup(options['location'][0]) file_path = s3bucket.get_key('cars_tas.csv') else: file_path = options['location'][0] field_map = { "treasury_account_identifier": "ACCT_NUM", "account_title": "GWA_TAS_NAME", "reporting_agency_id": "Agency AID", "reporting_agency_name": "Agency Name", "budget_bureau_code": "ADMIN_ORG", "budget_bureau_name": "Admin Org Name", "fr_entity_code": "FR Entity Type", "fr_entity_description": "FR Entity Description", "budget_function_code": "Function Code", "budget_function_title": "Function Description", "budget_subfunction_code": "Sub Function Code", "budget_subfunction_title": "Sub Function Description" } value_map = { "data_source": "USA", "tas_rendering_label": self.generate_tas_rendering_label, "allocation_transfer_agency_id": lambda row: row["ATA"].strip(), "agency_id": lambda row: row["AID"].strip(), "beginning_period_of_availability": lambda row: row["BPOA"].strip(), "ending_period_of_availability": lambda row: row["EPOA"].strip(), "availability_type_code": lambda row: row["A"].strip(), "main_account_code": lambda row: row["MAIN"].strip(), "sub_account_code": lambda row: row["SUB"].strip(), "awarding_toptier_agency": lambda row: ToptierAgency.objects.filter(cgac_code=row[ "ATA"].strip()).order_by("fpds_code").first(), "funding_toptier_agency": lambda row: ToptierAgency.objects.filter(cgac_code=row[ "AID"].strip()).order_by("fpds_code").first() } loader = ThreadedDataLoader( model_class=TreasuryAppropriationAccount, field_map=field_map, value_map=value_map, collision_field='treasury_account_identifier', collision_behavior='update', pre_row_function=self.skip_and_remove_financing_tas) loader.load_from_file(filepath=file_path, remote_file=is_remote_file) # Match funding toptiers by FREC if they didn't match by AID unmapped_funding_agencies = TreasuryAppropriationAccount.objects.filter( funding_toptier_agency=None) match_count = 0 self.logger.info( 'Found {} unmatched funding agencies across all TAS objects. ' 'Attempting to match on FREC.'.format( unmapped_funding_agencies.count())) for next_tas in unmapped_funding_agencies: # CGAC code is a combination of FRECs and CGACs. It will never be empty and it will always # be unique in ToptierAgencies; this should be safe to do. frec_match = ToptierAgency.objects.filter( cgac_code=next_tas.fr_entity_code).first() if frec_match: match_count += 1 self.logger.info( 'Matched unknown funding agency for TAS {} with FREC {}'. format(next_tas.tas_rendering_label, next_tas.fr_entity_code)) next_tas.funding_toptier_agency = frec_match next_tas.save() self.logger.info( 'Updated {} funding toptiers with a FREC agency.'.format( match_count)) # update TAS fk relationships to federal accounts remove_empty_federal_accounts() update_federal_accounts() insert_federal_accounts()
def test_threaded_data_loader(): """ Test the threaded data loader to ensure full coverage and the testing of all collision states """ # Create the field map, value map, and threaded data loader object # The field map is truncated because we don't care about most fields actually getting loaded field_map = { "treasury_account_identifier": "ACCT_NUM", "account_title": "GWA_TAS_NAME" } loader = ThreadedDataLoader( model_class=TreasuryAppropriationAccount, field_map=field_map, collision_field="treasury_account_identifier", collision_behavior="update", ) # We'll be using the tas_list.csv, modified to have fewer lines file_path_1 = str(settings.APP_DIR / "data" / "testing_data" / "tas_list_1.csv") file_1_account_title = "Compensation of Members and Related Administrative Expenses, Senat" file_path_2 = str(settings.APP_DIR / "data" / "testing_data" / "tas_list_2.csv") file_2_account_title = "Update Test Name" # Load it once loader.load_from_file(file_path_1) gwa_tas = TreasuryAppropriationAccount.objects.get( treasury_account_identifier="53021") # Check that we loaded successfully assert gwa_tas.account_title == file_1_account_title # Now load again, but file 2. Collision behavior of "update" should update the name without deleting the record gwa_tas.beginning_period_of_availability = 2004 gwa_tas.save() loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get( treasury_account_identifier="53021") assert gwa_tas.account_title == file_2_account_title assert gwa_tas.beginning_period_of_availability == "2004" # If this passes, the update collision works # Let's test delete! loader.collision_behavior = "delete" loader.load_from_file(file_path_1) gwa_tas = TreasuryAppropriationAccount.objects.get( treasury_account_identifier="53021") assert gwa_tas.beginning_period_of_availability is None assert gwa_tas.account_title == file_1_account_title # Now to test skip loader.collision_behavior = "skip" loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get( treasury_account_identifier="53021") assert gwa_tas.account_title == file_1_account_title # Now test skip and complain loader.collision_behavior = "skip_and_complain" loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get( treasury_account_identifier="53021") assert gwa_tas.account_title == file_1_account_title
def test_threaded_data_loader(self): """ Test the threaded data loader to ensure full coverage and the testing of all collision states """ # Create the field map, value map, and threaded data loader object # The field map is truncated because we don't care about most fields # actually getting loaded field_map = { "treasury_account_identifier": "ACCT_NUM", "gwa_tas": "GWA_TAS", "gwa_tas_name": "GWA_TAS NAME" } loader = ThreadedDataLoader( model_class=TreasuryAppropriationAccount, field_map=field_map, collision_field='treasury_account_identifier', collision_behavior='update') # We'll be using the tas_list.csv, modified to have fewer lines file_path_1 = os.path.join( settings.BASE_DIR, 'usaspending_api/data/testing_data/tas_list_1.csv') file_1_gwa_tas_name = "Compensation of Members and Related Administrative Expenses, Senat" file_path_2 = os.path.join( settings.BASE_DIR, 'usaspending_api/data/testing_data/tas_list_2.csv') file_2_gwa_tas_name = "Update Test Name" # Load it once loader.load_from_file(file_path_1) gwa_tas = TreasuryAppropriationAccount.objects.get(gwa_tas='110100') # Check that we loaded successfully self.assertEqual(gwa_tas.gwa_tas_name, file_1_gwa_tas_name) # Now load again, but file 2. Collision behavior of "update" should update the name # without deleting the record gwa_tas.beginning_period_of_availability = 2004 gwa_tas.save() loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get(gwa_tas='110100') self.assertEqual(gwa_tas.gwa_tas_name, file_2_gwa_tas_name) self.assertEqual(gwa_tas.beginning_period_of_availability, '2004') # If this passes, the update collision works # Let's test delete! loader.collision_behavior = 'delete' loader.load_from_file(file_path_1) gwa_tas = TreasuryAppropriationAccount.objects.get(gwa_tas='110100') self.assertEqual(gwa_tas.beginning_period_of_availability, None) self.assertEqual(gwa_tas.gwa_tas_name, file_1_gwa_tas_name) # Now to test skip loader.collision_behavior = 'skip' loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get(gwa_tas='110100') self.assertEqual(gwa_tas.gwa_tas_name, file_1_gwa_tas_name) # Now test skip and complain loader.collision_behavior = 'skip_and_complain' loader.load_from_file(file_path_2) gwa_tas = TreasuryAppropriationAccount.objects.get(gwa_tas='110100') self.assertEqual(gwa_tas.gwa_tas_name, file_1_gwa_tas_name)