示例#1
0
def get_pvs():
    """
    Author       : Thomas Mahoney
    Date         : 27 / 03 / 2018
    Purpose      : Extracts the PV data from the process_variables table.
    Parameters   : conn - a connection object linking  the database.
    Returns      : a collection of pv names and statements
    Requirements : NA
    Dependencies : NA
    """

    engine = db.get_sql_connection()

    if engine is None:
        raise ConnectionError("Cannot get database connection")

    with engine.connect() as conn:

        sql = """SELECT 
                    PROCVAR_NAME,PROCVAR_RULE
                 FROM 
                    SAS_PROCESS_VARIABLE
                 ORDER BY 
                    PROCVAR_ORDER"""

        v = conn.engine.execute(sql)
        return v.fetchall()
def database_connection():
    """
    This fixture provides the database connection. It is added to the function argument of each test
    and picked up by the test from there. The fixture allows us to re-use the same database connection
    over and over again.
    """
    return db.get_sql_connection()
示例#3
0
def r_population_input(df_survey_input, df_tr_totals):
    """
    Author       : David Powell / edits by Nassir Mohammad
    Date         : 07/06/2018
    Purpose      : Creates population data that feeds into the R GES weighting
    Parameters   : df_survey_input - A data frame containing the survey data for
                   processing month
                   trtotals - A data frame containing population information for
                   processing year
    Returns      : A data frame containing the information needed for GES weighting
    Requirements : NA
    Dependencies : NA
    """

    # Sort input values
    sort1 = [SAMP_PORT_GRP_PV, ARRIVEDEPART]
    df_survey_input_sorted = df_survey_input.sort_values(sort1)

    # Cleanse data
    df_survey_input_sorted = df_survey_input_sorted[
        ~df_survey_input_sorted[SAMP_PORT_GRP_PV].isnull()]
    df_survey_input_sorted = df_survey_input_sorted[
        ~df_survey_input_sorted[ARRIVEDEPART].isnull()]

    # Sort input values
    df_pop_totals = df_tr_totals.sort_values(sort1)

    # Cleanse data
    df_pop_totals = df_pop_totals[~df_pop_totals[SAMP_PORT_GRP_PV].isnull()]
    df_pop_totals = df_pop_totals[~df_pop_totals[ARRIVEDEPART].isnull()]

    # Create unique list of items from survey input
    items = df_survey_input_sorted[SAMP_PORT_GRP_PV].tolist()
    unique = []
    [unique.append(x) for x in items if x not in unique]

    df_pop_totals_match = df_pop_totals[df_pop_totals[SAMP_PORT_GRP_PV].isin(
        unique)]

    # Create traffic totals
    df_pop_totals_match = df_pop_totals_match.sort_values(
        [ARRIVEDEPART, SAMP_PORT_GRP_PV])
    df_traffic_totals = df_pop_totals_match.groupby(
        [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({
            TRAFFIC_TOTAL_COLUMN: 'sum'
        }).reset_index()

    # Create lookup. Group by and aggregate
    lookup_dataframe = df_survey_input_sorted.copy()
    lookup_dataframe["count"] = ""
    lookup_dataframe = lookup_dataframe.groupby(
        [SAMP_PORT_GRP_PV, ARRIVEDEPART]).agg({
            "count": 'count'
        }).reset_index()

    # Cleanse data
    # lookup_dataframe.drop(["count"], axis=1)
    lookup_dataframe[T1] = range(len(lookup_dataframe))
    lookup_dataframe[T1] = lookup_dataframe[T1] + 1

    # Create population totals for current survey data - Cleanse data and merge
    lookup_dataframe_aux = lookup_dataframe[[
        SAMP_PORT_GRP_PV, ARRIVEDEPART, T1
    ]]
    lookup_dataframe_aux[T1] = lookup_dataframe_aux.T1.astype(np.int64)

    df_mod_totals = pd.merge(df_traffic_totals,
                             lookup_dataframe_aux,
                             on=[SAMP_PORT_GRP_PV, ARRIVEDEPART],
                             how='left')

    df_mod_totals[MODEL_GROUP] = 1
    df_mod_totals = df_mod_totals.drop(
        columns=[ARRIVEDEPART, SAMP_PORT_GRP_PV])
    df_mod_pop_totals = df_mod_totals.pivot_table(index=MODEL_GROUP,
                                                  columns=T1,
                                                  values=TRAFFIC_TOTAL_COLUMN)
    df_mod_pop_totals = df_mod_pop_totals.add_prefix('T_')

    df_mod_pop_totals[MODEL_GROUP] = 1
    cols = [MODEL_GROUP
            ] + [col for col in df_mod_pop_totals if col != MODEL_GROUP]
    df_mod_pop_totals = df_mod_pop_totals[cols]

    df_mod_pop_totals = df_mod_pop_totals.reset_index(drop=True)

    con = db.get_sql_connection()
    # recreate proc_vec table

    # note the index gets added so needs to be removed when re-read from SQL
    df_mod_pop_totals.to_sql(POP_PROWVEC_TABLE, con, if_exists='replace')

    df_mod_pop_totals_import = db.get_table_values(POP_PROWVEC_TABLE)
    df_mod_pop_totals_import = df_mod_pop_totals_import.drop('index', axis=1)

    return df_mod_pop_totals_import