def handle(self, *args, **options): # Grab the data broker database connections if not options['test']: try: db_conn = connections['data_broker'] db_cursor = db_conn.cursor() except Exception as err: logger.critical('Could not connect to database. Is DATA_BROKER_DATABASE_URL set?') logger.critical(print(err)) return else: db_cursor = PhonyCursor() self.handle_loading(db_cursor=db_cursor, *args, **options) if not options['noclean']: # TODO: If this is slow, add ID limiting as below logger.info('Updating model description fields...') update_model_description_fields() # TODO: Find out where this is being used # logger.info('Updating awards to reflect their latest associated transaction info...') # update_awards(tuple(award_update_id_list)) # # logger.info('Updating contract-specific awards to reflect their latest transaction info...') # update_contract_awards(tuple(award_contract_update_id_list)) # # logger.info('Updating award category variables...') # update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def test_description_fields(description_updatable_models): update_model_description_fields() for item in TransactionFPDS.objects.all(): assert item.cost_or_pricing_data_desc == daims_maps["cost_or_pricing_data_map"][item.cost_or_pricing_data] assert item.multiple_or_single_aw_desc ==\ daims_maps["multiple_or_single_award_i_map"][item.multiple_or_single_award_i] assert item.cost_accounting_stand_desc ==\ daims_maps["cost_accounting_standards_map"][item.cost_accounting_standards] assert item.fed_biz_opps_description == daims_maps["fed_biz_opps_map"][item.fed_biz_opps] contract_transactions = TransactionNormalized.objects.filter(assistance_data__isnull=False, contract_data__isnull=True) assistance_transactions = TransactionNormalized.objects.filter(assistance_data__isnull=True, contract_data__isnull=False) assert assistance_transactions.filter(action_type="A").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_2_map"]["A"] assert assistance_transactions.filter(action_type="B").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_2_map"]["B"] assert assistance_transactions.filter(action_type="C").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_2_map"]["C"] assert assistance_transactions.filter(action_type="D").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_2_map"]["D"] assert contract_transactions.filter(action_type="A").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_1_map"]["A"] assert contract_transactions.filter(action_type="B").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_1_map"]["B"] assert contract_transactions.filter(action_type="C").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_1_map"]["C"] assert contract_transactions.filter(action_type="D").first().action_type_description ==\ daims_maps["TransactionNormalized.action_type_map"]["case_1_map"]["D"]
def handle(self, *args, **options): awards_cache.clear() # Grab the data broker database connections if not options['test']: try: db_conn = connections['data_broker'] db_cursor = db_conn.cursor() except Exception as err: logger.critical('Could not connect to database. Is DATA_BROKER_DATABASE_URL set?') logger.critical(print(err)) return else: db_cursor = PhonyCursor() self.handle_loading(db_cursor=db_cursor, *args, **options) if not options['noclean']: # TODO: If this is slow, add ID limiting as below logger.info('Updating model description fields...') update_model_description_fields() # TODO: Find out where this is being used # logger.info('Updating awards to reflect their latest associated transaction info...') # update_awards(tuple(award_update_id_list)) # # logger.info('Updating contract-specific awards to reflect their latest transaction info...') # update_contract_awards(tuple(award_contract_update_id_list)) # # logger.info('Updating award category variables...') # update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger = logging.getLogger('console') logger.info('Updating model description fields...') update_model_description_fields() logger.info('SKIPPING - Done in load_base - Updating awards to reflect their latest associated transaction ' 'info...') # update_awards() logger.info('SKIPPING - Done in load_base - Updating contract-specific awards to reflect their latest ' 'transaction info...') # update_contract_awards() logger.info('SKIPPING - Done in load_base - Updating award category variables...')
def handle(self, *args, **options): logger = logging.getLogger('console') logger.info('Updating model description fields...') update_model_description_fields() logger.info( 'SKIPPING - Done in load_base - Updating awards to reflect their latest associated transaction ' 'info...') # update_awards() logger.info( 'SKIPPING - Done in load_base - Updating contract-specific awards to reflect their latest ' 'transaction info...') # update_contract_awards() logger.info( 'SKIPPING - Done in load_base - Updating award category variables...' )
def handle(self, *args, **options): update_model_description_fields()
def handle(self, *args, **options): awards_cache.clear() # Grab the data broker database connections if not options['test']: try: db_conn = connections['data_broker'] db_cursor = db_conn.cursor() except Exception as err: logger.critical( 'Could not connect to database. Is DATA_BROKER_DATABASE_URL set?' ) logger.critical(print(err)) return else: db_cursor = PhonyCursor() # Grab the submission id submission_id = options['submission_id'][0] # Verify the ID exists in the database, and grab the data db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id]) submission_data = dictfetchall(db_cursor) if len(submission_data) == 0: logger.error('Could not find submission with id ' + str(submission_id)) return elif len(submission_data) > 1: logger.error('Found multiple submissions with id ' + str(submission_id)) return # We have a single submission, which is what we want submission_data = submission_data[0] broker_submission_id = submission_data['submission_id'] del submission_data[ 'submission_id'] # To avoid collisions with the newer PK system submission_attributes = get_submission_attributes( broker_submission_id, submission_data) # Move on, and grab file A data db_cursor.execute( 'SELECT * FROM appropriation WHERE submission_id = %s', [submission_id]) appropriation_data = dictfetchall(db_cursor) logger.info('Acquired appropriation data for ' + str(submission_id) + ', there are ' + str(len(appropriation_data)) + ' rows.') load_file_a(submission_attributes, appropriation_data, db_cursor) # Let's get File B information db_cursor.execute( 'SELECT * FROM object_class_program_activity WHERE submission_id = %s', [submission_id]) prg_act_obj_cls_data = dictfetchall(db_cursor) logger.info('Acquired program activity object class data for ' + str(submission_id) + ', there are ' + str(len(prg_act_obj_cls_data)) + ' rows.') load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor) # File D2 db_cursor.execute( 'SELECT * FROM award_financial_assistance WHERE submission_id = %s', [submission_id]) award_financial_assistance_data = dictfetchall(db_cursor) logger.info('Acquired award financial assistance data for ' + str(submission_id) + ', there are ' + str(len(award_financial_assistance_data)) + ' rows.') load_file_d2(submission_attributes, award_financial_assistance_data, db_cursor) # File D1 db_cursor.execute( 'SELECT * FROM award_procurement WHERE submission_id = %s', [submission_id]) procurement_data = dictfetchall(db_cursor) logger.info('Acquired award procurement data for ' + str(submission_id) + ', there are ' + str(len(procurement_data)) + ' rows.') load_file_d1(submission_attributes, procurement_data, db_cursor) # Let's get File C information # Note: we load File C last, because the D1 and D2 files have the awarding # agency top tier (CGAC) and sub tier data needed to look up/create # the most specific possible corresponding award. When looking up/ # creating awards for File C, we dont have sub-tier agency info, so # we'll do our best to match them to the more specific award records # already created by the D file load db_cursor.execute( 'SELECT * FROM award_financial WHERE submission_id = %s', [submission_id]) award_financial_data = dictfetchall(db_cursor) logger.info('Acquired award financial data for ' + str(submission_id) + ', there are ' + str(len(award_financial_data)) + ' rows.') load_file_c(submission_attributes, award_financial_data, db_cursor) # Once all the files have been processed, run any global # cleanup/post-load tasks. # 1. Update the descriptions TODO: If this is slow, add ID limiting as above update_model_description_fields() # 2. Update awards to reflect their latest associated txn info update_awards(tuple(AWARD_UPDATE_ID_LIST)) # 3. Update contract-specific award fields to reflect latest txn info update_contract_awards(tuple(AWARD_CONTRACT_UPDATE_ID_LIST))
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_assistance_list = [] for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency = self.get_awarding_agency( row) # todo: use agency dict? # Create the transaction object for this row txn_dict = { "submission": subattr, "action_date": h.convert_date(row['obligation_action_date']), "action_type": h.up2colon(row['action_type']), "award": self.get_or_create_award(row, awarding_agency=awarding_agency), "awarding_agency": awarding_agency, "description": row["project_description"], # ?? account_title is anther contender? "data_source": "USA", "federal_action_obligation": row["fed_funding_amount"], "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["federal_award_mod"], # ?? "period_of_performance_start_date": h.convert_date(row['starting_date']), "period_of_performance_current_end_date": h.convert_date(row['ending_date']), "place_of_performance": h.get_or_create_location( row, location_mapper_fin_assistance_principal_place), "recipient": self.get_or_create_recipient(row), "type": h.up2colon(row['assistance_type']), "usaspending_unique_transaction_id": row["unique_transaction_id"], # ??"funding_agency_id": # ?? "certified date": } txn = Transaction(**txn_dict) txn.fiscal_year = fy(txn.action_date) txn_list.append(txn) # Create the transaction contract object for this row txn_assistance_dict = { "submission": subattr, "fain": row["federal_award_id"], "uri": row["uri"], "cfda": Cfda.objects.filter( program_number=row["cfda_program_num"]).first(), "correction_late_delete_indicator": h.up2colon(row['correction_late_ind']), "face_value_loan_guarantee": row["face_loan_guran"], "fiscal_year_and_quarter_correction": row["fyq_correction"], "non_federal_funding_amount": row["non_fed_funding_amount"], "original_loan_subsidy_cost": row["orig_sub_guran"], # ?? "record_type": int(h.up2colon(row['record_type'])), "sai_number": row["sai_number"], "submitted_type": "C", # ?? For CSV? } # ?? business_funds_indicator # ?? reporting period start/end?? txn_assistance = TransactionAssistance(**txn_assistance_dict) txn_assistance_list.append(txn_assistance) # Bulk insert transaction rows self.logger.info( "Starting Transaction bulk insert ({} records)".format( len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn assistance list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_assistance_list): t.transaction = txn_list[idx] award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction assistance rows self.logger.info( "Starting TransactionAssistance bulk insert ({} records)".format( len(txn_assistance_list))) TransactionAssistance.objects.bulk_create(txn_assistance_list) self.logger.info("Completed TransactionAssistance bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def handle(self, *args, **options): h.clear_caches() csv_file = options['file'][0] self.logger.info("Starting load for file {}".format(csv_file)) # Create the csv reader reader = CsvDataReader(csv_file) # Create a new submission attributes object for this timestamp subattr = SubmissionAttributes() subattr.usaspending_update = datetime.now() subattr.save() # Create lists to hold model instances for bulk insert txn_list = [] txn_contract_list = [] subtier_agency_dict = h.get_subtier_agency_dict() for idx, row in enumerate(reader): if len(reader) % 1000 == 0: self.logger.info("Read row {}".format(len(reader))) row = h.cleanse_values(row) awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict) # Create the transaction object for this row txn_dict = { "action_date": h.convert_date(row['signeddate']), "award": self.get_or_create_award(row, awarding_agency_id), "awarding_agency_id": awarding_agency_id, "data_source": "USA", "description": row["descriptionofcontractrequirement"], "federal_action_obligation": row["dollarsobligated"], "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict), "last_modified_date": h.convert_date(row['last_modified_date']), "modification_number": row["modnumber"], "place_of_performance": h.get_or_create_location( row, mapper=location_mapper_place_of_performance), "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']), "period_of_performance_start_date": h.convert_date(row['effectivedate']), "recipient": self.get_or_create_recipient(row), "submission": subattr, "type": evaluate_contract_award_type(row), "action_type": h.up2colon(row['reasonformodification']), "usaspending_unique_transaction_id": row["unique_transaction_id"] } txn = Transaction(**txn_dict) txn_list.append(txn) # Create the transaction contract object for this row txn_contract_dict = { "submission": subattr, "piid": row['piid'], "parent_award_id": row['idvpiid'], "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]), "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']), "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]), "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']), "gfe_gfp": h.up2colon(row['gfe_gfp']), "cost_or_pricing_data": h.up2colon(row['costorpricingdata']), "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']), "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']), "naics": h.up2colon(row['nationalinterestactioncode']), "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']), "commercial_item_acquisition_procedures": h.up2colon( row['commercialitemacquisitionprocedures']), "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']), "consolidated_contract": h.up2colon(row['consolidatedcontract']), "contingency_humanitarian_or_peacekeeping_operation": h.up2colon( row['contingencyhumanitarianpeacekeepingoperation']), "contract_bundling": h.up2colon(row['contractbundling']), "contract_financing": h.up2colon(row['contractfinancing']), "contracting_officers_determination_of_business_size": h.up2colon( row['contractingofficerbusinesssizedetermination']), "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']), "davis_bacon_act": h.up2colon(row['davisbaconact']), "evaluated_preference": h.up2colon(row['evaluatedpreference']), "extent_competed": h.up2colon(row['extentcompeted']), "information_technology_commercial_item_category": h.up2colon( row['informationtechnologycommercialitemcategory']), "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']), "local_area_set_aside": h.up2colon(row['localareasetaside']), "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']), "multi_year_contract": h.up2colon(row['multiyearcontract']), "national_interest_action": h.up2colon(row['nationalinterestactioncode']), "number_of_actions": h.up2colon(row['numberofactions']), "number_of_offers_received": h.up2colon(row['numberofoffersreceived']), "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']), "place_of_manufacture": h.up2colon(row['placeofmanufacture']), "product_or_service_code": h.up2colon(row['productorservicecode']), "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']), "research": h.up2colon(row['research']), "sea_transportation": h.up2colon(row['seatransportation']), "service_contract_act": h.up2colon(row['servicecontractact']), "small_business_competitiveness_demonstration_program": self.parse_first_character( row['smallbusinesscompetitivenessdemonstrationprogram']), "solicitation_procedures": h.up2colon(row['solicitationprocedures']), "subcontracting_plan": h.up2colon(row['subcontractplan']), "type_set_aside": h.up2colon(row['typeofsetaside']), "walsh_healey_act": h.up2colon(row['walshhealyact']), "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])), "type_of_idc": self.parse_first_character(row['typeofidc']), "a76_fair_act_action": self.parse_first_character(row['a76action']), "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']), "cost_accounting_standards": self.parse_first_character( row['costaccountingstandardsclause']), "fed_biz_opps": self.parse_first_character(row['fedbizopps']), "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']), "major_program": self.parse_first_character(row['majorprogramcode']), "program_acronym": self.parse_first_character(row['programacronym']), "referenced_idv_modification_number": self.parse_first_character( row['idvmodificationnumber']), "transaction_number": self.parse_first_character(row['transactionnumber']), "solicitation_identifier": self.parse_first_character(row['solicitationid']) } txn_contract = TransactionContract(**txn_contract_dict) txn_contract_list.append(txn_contract) # Bulk insert transaction rows self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list))) Transaction.objects.bulk_create(txn_list) self.logger.info("Completed Transaction bulk insert") # Update txn contract list with newly-inserted transactions award_id_list = [] # we'll need this when updating the awards later on for idx, t in enumerate(txn_contract_list): # add transaction info to this TransactionContract object t.transaction = txn_list[idx] # add the corresponding award id to a list we'll use when batch-updating award data award_id_list.append(txn_list[idx].award_id) # Bulk insert transaction contract rows self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list))) TransactionContract.objects.bulk_create(txn_contract_list) self.logger.info("Completed TransactionContract bulk insert") # Update awards to reflect latest transaction information # (note that this can't be done via signals or a save() # override in the model itself, because those aren't # triggered by a bulk update self.logger.info("Starting Awards update") count = update_awards(tuple(award_id_list)) update_contract_awards(tuple(award_id_list)) update_model_description_fields() self.logger.info("Completed Awards update ({} records)".format(count))
def update_descriptions(apps, schema_editor): update_model_description_fields()