def get_pvs(): """ Author : Thomas Mahoney Date : 27 / 03 / 2018 Purpose : Extracts the PV data from the process_variables table. Parameters : conn - a connection object linking the database. Returns : a collection of pv names and statements Requirements : NA Dependencies : NA """ engine = db.get_sql_connection() if engine is None: raise ConnectionError("Cannot get database connection") with engine.connect() as conn: sql = """SELECT PROCVAR_NAME,PROCVAR_RULE FROM SAS_PROCESS_VARIABLE ORDER BY PROCVAR_ORDER""" v = conn.engine.execute(sql) return v.fetchall()
def database_connection(): """ This fixture provides the database connection. It is added to the function argument of each test and picked up by the test from there. The fixture allows us to re-use the same database connection over and over again. """ return db.get_sql_connection()
def r_population_input(df_survey_input, df_tr_totals): """ Author : David Powell / edits by Nassir Mohammad Date : 07/06/2018 Purpose : Creates population data that feeds into the R GES weighting Parameters : df_survey_input - A data frame containing the survey data for processing month trtotals - A data frame containing population information for processing year Returns : A data frame containing the information needed for GES weighting Requirements : NA Dependencies : NA """ # Sort input values sort1 = [SAMP_PORT_GRP_PV, ARRIVEDEPART] df_survey_input_sorted = df_survey_input.sort_values(sort1) # Cleanse data df_survey_input_sorted = df_survey_input_sorted[ ~df_survey_input_sorted[SAMP_PORT_GRP_PV].isnull()] df_survey_input_sorted = df_survey_input_sorted[ ~df_survey_input_sorted[ARRIVEDEPART].isnull()] # Sort input values df_pop_totals = df_tr_totals.sort_values(sort1) # Cleanse data df_pop_totals = df_pop_totals[~df_pop_totals[SAMP_PORT_GRP_PV].isnull()] df_pop_totals = df_pop_totals[~df_pop_totals[ARRIVEDEPART].isnull()] # Create unique list of items from survey input items = df_survey_input_sorted[SAMP_PORT_GRP_PV].tolist() unique = [] [unique.append(x) for x in items if x not in unique] df_pop_totals_match = df_pop_totals[df_pop_totals[SAMP_PORT_GRP_PV].isin( unique)] # Create traffic totals df_pop_totals_match = df_pop_totals_match.sort_values( [ARRIVEDEPART, SAMP_PORT_GRP_PV]) df_traffic_totals = df_pop_totals_match.groupby( [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({ TRAFFIC_TOTAL_COLUMN: 'sum' }).reset_index() # Create lookup. Group by and aggregate lookup_dataframe = df_survey_input_sorted.copy() lookup_dataframe["count"] = "" lookup_dataframe = lookup_dataframe.groupby( [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({ "count": 'count' }).reset_index() # Cleanse data # lookup_dataframe.drop(["count"], axis=1) lookup_dataframe[T1] = range(len(lookup_dataframe)) lookup_dataframe[T1] = lookup_dataframe[T1] + 1 # Create population totals for current survey data - Cleanse data and merge lookup_dataframe_aux = lookup_dataframe[[ SAMP_PORT_GRP_PV, ARRIVEDEPART, T1 ]] lookup_dataframe_aux[T1] = lookup_dataframe_aux.T1.astype(np.int64) df_mod_totals = pd.merge(df_traffic_totals, lookup_dataframe_aux, on=[SAMP_PORT_GRP_PV, ARRIVEDEPART], how='left') df_mod_totals[MODEL_GROUP] = 1 df_mod_totals = df_mod_totals.drop( columns=[ARRIVEDEPART, SAMP_PORT_GRP_PV]) df_mod_pop_totals = df_mod_totals.pivot_table(index=MODEL_GROUP, columns=T1, values=TRAFFIC_TOTAL_COLUMN) df_mod_pop_totals = df_mod_pop_totals.add_prefix('T_') df_mod_pop_totals[MODEL_GROUP] = 1 cols = [MODEL_GROUP ] + [col for col in df_mod_pop_totals if col != MODEL_GROUP] df_mod_pop_totals = df_mod_pop_totals[cols] df_mod_pop_totals = df_mod_pop_totals.reset_index(drop=True) con = db.get_sql_connection() # recreate proc_vec table # note the index gets added so needs to be removed when re-read from SQL df_mod_pop_totals.to_sql(POP_PROWVEC_TABLE, con, if_exists='replace') df_mod_pop_totals_import = db.get_table_values(POP_PROWVEC_TABLE) df_mod_pop_totals_import = df_mod_pop_totals_import.drop('index', axis=1) return df_mod_pop_totals_import