def update_tas_lookups(sess, csv_path, update_missing=[], metrics=None): """ Load TAS data from the provided CSV and replace/insert any TASLookups Args: sess: connection to database csv_path: path of the car_tas csv to import update_missing: if provided, this list of account numbers will only update matching records if the budget_function_code is null/none metrics: an object containing information for the metrics file """ if not metrics: metrics = {'records_updated': 0, 'records_added': 0} data = clean_tas(csv_path, metrics) add_existing_id(data) old_data = data[data['existing_id'].notnull()] del old_data['existing_id'] new_data = data[data['existing_id'].isnull()] del new_data['existing_id'] if update_missing: # find which incoming records can fill in the empty records relevant_old_data = old_data[old_data['account_num'].isin( update_missing)] # Fill them in. If budget_function_code is empty, the following columns have also been empty. fill_in_cols = [ 'account_title', 'budget_bureau_code', 'budget_bureau_name', 'budget_function_code', 'budget_function_title', 'budget_subfunction_code', 'budget_subfunction_title', 'reporting_agency_aid', 'reporting_agency_name' ] for _, row in relevant_old_data.iterrows(): fill_in_updates = { fill_in_col: row[fill_in_col] for fill_in_col in fill_in_cols } fill_in_updates['updated_at'] = datetime.now(timezone.utc) sess.query(TASLookup).filter_by( account_num=row['account_num']).update( synchronize_session=False, values=fill_in_updates) logger.info('%s records filled in', len(relevant_old_data.index)) metrics['records_updated'] += len(relevant_old_data.index) else: # instead of using the pandas to_sql dataframe method like some of the other domain load processes, iterate # through the dataframe rows so we can load using the orm model (note: toyed with the SQLAlchemy bulk load # options but ultimately decided not to go outside the unit of work for the sake of a performance gain) for _, row in old_data.iterrows(): sess.query(TASLookup).filter_by( account_num=row['account_num']).update( row, synchronize_session=False) for _, row in new_data.iterrows(): sess.add(TASLookup(**row)) logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull())) metrics['records_updated'] += len(old_data.index) metrics['records_added'] += len(new_data.index) sess.commit()
def updateTASLookups(csvPath): """Load TAS data from the provided CSV and replace/insert any TASLookups""" sess = GlobalDB.db().session data = cleanTas(csvPath) add_start_date(data) add_existing_id(data) # Mark all TAS we don't see as "ended" existing_ids = [int(i) for i in data['existing_id'] if pd.notnull(i)] sess.query(TASLookup).\ filter(TASLookup.internal_end_date == None).\ filter(~TASLookup.tas_id.in_(existing_ids)).\ update({'internal_end_date': date.today()}, synchronize_session=False) new_data = data[data['existing_id'].isnull()] del new_data['existing_id'] # instead of using the pandas to_sql dataframe method like some of the # other domain load processes, iterate through the dataframe rows so we # can load using the orm model (note: toyed with the SQLAlchemy bulk load # options but ultimately decided not to go outside the unit of work for # the sake of a performance gain) for _, row in new_data.iterrows(): sess.add(TASLookup(**row)) sess.commit() logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
def update_tas_lookups(csv_path): """Load TAS data from the provided CSV and replace/insert any TASLookups""" sess = GlobalDB.db().session data = clean_tas(csv_path) add_existing_id(data) old_data = data[data['existing_id'].notnull()] del old_data['existing_id'] new_data = data[data['existing_id'].isnull()] del new_data['existing_id'] # instead of using the pandas to_sql dataframe method like some of the # other domain load processes, iterate through the dataframe rows so we # can load using the orm model (note: toyed with the SQLAlchemy bulk load # options but ultimately decided not to go outside the unit of work for # the sake of a performance gain) for _, row in old_data.iterrows(): sess.query(TASLookup).filter_by(account_num=row['account_num']).update(row, synchronize_session=False) for _, row in new_data.iterrows(): sess.add(TASLookup(**row)) sess.commit() logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
def addTAS(self, ata, aid, bpoa, epoa, availability, main, sub): """ Add a TAS to the validation database if it does not exist. This method can be slow. Args: ata -- allocation transfer agency aid -- agency identifier bpoa -- beginning period of availability epoa -- ending period of availability availability -- availability type code main -- main account code sub -- sub account code """ queryResult = self.session.query(TASLookup). \ filter(TASLookup.allocation_transfer_agency == ata). \ filter(TASLookup.agency_identifier == aid). \ filter(TASLookup.beginning_period_of_availability == bpoa). \ filter(TASLookup.ending_period_of_availability == epoa). \ filter(TASLookup.availability_type_code == availability). \ filter(TASLookup.main_account_code == main). \ filter(TASLookup.sub_account_code == sub).all() if (len(queryResult) == 0): tas = TASLookup() tas.allocation_transfer_agency = ata tas.agency_identifier = aid tas.beginning_period_of_availability = bpoa tas.ending_period_of_availability = epoa tas.availability_type_code = availability tas.main_account_code = main tas.sub_account_code = sub self.session.add(tas) self.session.commit() return True return False
def updateTASLookups(csvPath): """Load TAS data from the provided CSV and replace/insert any TASLookups""" sess = GlobalDB.db().session data = cleanTas(csvPath) # Delete all existing TAS records -- we don't want to accept submissions # after the entries fall off the CARS file sess.query(TASLookup).delete(synchronize_session=False) # instead of using the pandas to_sql dataframe method like some of the # other domain load processes, iterate through the dataframe rows so we # can load using the orm model (note: toyed with the SQLAlchemy bulk load # options but ultimately decided not to go outside the unit of work for # the sake of a performance gain) for _, row in data.iterrows(): sess.add(TASLookup(**row)) sess.commit() logger.info('%s records inserted to %s', len(data.index), TASLookup.__tablename__)
def addTAS(self,ata,aid,bpoa,epoa,availability,main,sub): """ Add a TAS to the validation database if it does not exist. This method can be slow. Args: ata -- allocation transfer agency aid -- agency identifier bpoa -- beginning period of availability epoa -- ending period of availability availability -- availability type code main -- main account code sub -- sub account code """ queryResult = self.session.query(TASLookup).\ filter(TASLookup.allocation_transfer_agency == ata).\ filter(TASLookup.agency_identifier == aid).\ filter(TASLookup.beginning_period_of_availability == bpoa).\ filter(TASLookup.ending_period_of_availability == epoa).\ filter(TASLookup.availability_type_code == availability).\ filter(TASLookup.main_account_code == main).\ filter(TASLookup.sub_account_code == sub).all() if ( len(queryResult) == 0) : tas = TASLookup() tas.allocation_transfer_agency =ata tas.agency_identifier=aid tas.beginning_period_of_availability = bpoa tas.ending_period_of_availability = epoa tas.availability_type_code = availability tas.main_account_code = main tas.sub_account_code = sub self.session.add(tas) self.session.commit() return True return False