def teardown_module(module):
    # Delete any previous records from the Survey_Subsample tables for the given run ID
    ctf.reset_test_tables(RUN_ID, STEP_CONFIGURATION[STEP_NAME])

    # Cleanses Survey Subsample table.
    cf.delete_from_table(idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', RUN_ID)

    print("Teardown")
def teardown_module(module):
    """ Teardown any state that was previously setup with a setup_module method. """
    # Deletes data from temporary tables as necessary.
    ctf.reset_test_tables(RUN_ID, STEP_CONFIGURATION[STEP_NAME])

    # Cleanses Survey Subsample table.
    cf.delete_from_table(idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', RUN_ID)

    print("Duration: {}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - START_TIME))))
Exemplo n.º 3
0
def reset_tables():
    """
    Author        : Thomas Mahoney
    Date          : 7 Sep 2018
    Purpose       : Deletes records from tables associated with the dataimport test.
    Parameters    : NA
    Returns       : NA
    """
    """ Deletes records from tables associated with the dataimport test. """

    print(
        "Deleting records from tables associated with the dataimport test...")

    tables_to_delete_run_id = [
        idm.SURVEY_SUBSAMPLE_TABLE, "TRAFFIC_DATA", "SHIFT_DATA",
        "NON_RESPONSE_DATA", "UNSAMPLED_OOH_DATA"
    ]

    for table in tables_to_delete_run_id:
        cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID)
        cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_OCTOBER_2017")
        cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_NOVEMBER_2017")
        cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_DECEMBER_2017")
        cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_Q3_2017")

    tables_to_delete_all = [
        'SAS_SURVEY_SUBSAMPLE',
        'SAS_SHIFT_DATA',
        'SAS_NON_RESPONSE_DATA',
        'SAS_TRAFFIC_DATA',
        'SAS_UNSAMPLED_OOH_DATA',
    ]

    for table in tables_to_delete_all:
        cf.delete_from_table(table)

    print("Import table test records deleted.")
def test_non_response_weight_step(path_to_data):

    # Get database connection
    conn = database_connection()

    # Run step 1
    idm.populate_survey_data_for_step(RUN_ID, conn, step_config)

    # ###########################
    # run checks 1
    # ###########################

    # Check all deleted tables are empty
    for table in step_config['delete_tables']:
        delete_result = cf.get_table_values(table)
        assert delete_result.empty

    # Check all nullified columns are NULL
    for column in step_config['nullify_pvs']:
        column_name = column.replace('[', '').replace(']', '')
        result = cf.select_data(column_name, idm.SURVEY_SUBSAMPLE_TABLE,
                                'RUN_ID', RUN_ID)
        assert result[column_name].isnull().sum() == len(result)

    # Check table has been populated
    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == EXPECTED_LEN

    # Run step 2
    idm.populate_step_data(RUN_ID, conn, step_config)

    # ###########################
    # run checks 2
    # ###########################

    # Check table has been populated
    table_len = len(cf.get_table_values(step_config["data_table"]))
    assert table_len == NON_RESPONSE_DATA_LENGTH

    # Run step 3
    idm.copy_step_pvs_for_survey_data(RUN_ID, conn, step_config)

    # ###########################
    # run checks 3
    # ###########################

    # Get all values from the sas_process_variables table
    results = cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)

    # Check number of PV records moved matches number passed in through step configuration.
    assert len(results) == len(step_config['pv_columns'])

    # Get the spv_table values and ensure all records have been deleted
    results = cf.get_table_values(step_config['spv_table'])
    assert len(results) == 0

    # ###########################
    # run checks 3
    # ###########################

    # Run step 4  : Apply Non Response Wt PVs On Survey Data
    process_variables.process(dataset='survey',
                              in_table_name='SAS_SURVEY_SUBSAMPLE',
                              out_table_name='SAS_NON_RESPONSE_SPV',
                              in_id='serial')

    # ###########################
    # run checks 4
    # ###########################

    table_len = len(cf.get_table_values(step_config["spv_table"]))
    assert table_len == EXPECTED_LEN

    # Run step 5 : Update Survey Data with Non Response Wt PVs Output
    idm.update_survey_data_with_step_pv_output(conn, step_config)

    # ###########################
    # run checks 5
    # ###########################

    # Check all columns in SAS_SURVEY_SUBSAMPLE have been altered
    result = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)
    for column in step_config['pv_columns']:
        column_name = column.replace("'", "")
        assert len(result[column_name]) == EXPECTED_LEN
        assert result[column_name].sum() != 0

    # Assert SAS_PROCESS_VARIABLES_TABLE has been cleansed
    table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE))
    assert table_len == 0

    # Assert spv_table has been cleansed
    table_len = len(cf.get_table_values(step_config["spv_table"]))
    assert table_len == 0

    # Run step 6 : Copy Non Response Wt PVs for Non Response Data
    idm.copy_step_pvs_for_step_data(RUN_ID, conn, step_config)

    # ###########################
    # run checks 6
    # ###########################

    # Assert pv_table has been cleansed
    table_len = len(cf.get_table_values(step_config["pv_table"]))
    assert table_len == 0

    # Assert SAS_PROCESS_VARIABLES_TABLE was populated
    table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE))
    assert table_len == NON_RESPONSE_SAS_PROCESS_VARIABLE_TABLE_LENGTH

    # Run step 7 : Apply Non Response Wt PVs On Non Response Data
    process_variables.process(dataset='non_response',
                              in_table_name='SAS_NON_RESPONSE_DATA',
                              out_table_name='SAS_NON_RESPONSE_PV',
                              in_id='REC_ID')

    # ###########################
    # run checks 7
    # ###########################

    table_len = len(cf.get_table_values(step_config["pv_table"]))
    assert table_len == NON_RESPONSE_DATA_LENGTH

    # Run step 8 : Update NonResponse Data With PVs Output
    idm.update_step_data_with_step_pv_output(conn, step_config)

    # ###########################
    # run checks 8
    # ###########################

    # Assert data table was populated
    table_len = len(cf.get_table_values(step_config["data_table"]))
    assert table_len == NON_RESPONSE_DATA_LENGTH

    # Assert the following tables were cleansed
    deleted_tables = [
        step_config["pv_table"], step_config["temp_table"],
        idm.SAS_PROCESS_VARIABLES_TABLE, step_config["sas_ps_table"]
    ]

    for table in deleted_tables:
        table_len = len(cf.get_table_values(table))
        assert table_len == 0

    # ##############################
    # Calculate Non Response Weight
    # ##############################

    # dataimport the data from SQL and sort
    df_surveydata_import_actual = cf.get_table_values(
        idm.SAS_SURVEY_SUBSAMPLE_TABLE)

    df_surveydata_import_actual_sql = df_surveydata_import_actual.sort_values(
        by='SERIAL')
    df_surveydata_import_actual_sql.index = range(
        0, len(df_surveydata_import_actual_sql))

    df_nr_data_import_actual = cf.get_table_values(
        SAS_NON_RESPONSE_DATA_TABLE_NAME)

    # fix formatting in actual data
    df_surveydata_import_actual_sql.drop(['EXPENDCODE'], axis=1, inplace=True)
    df_surveydata_import_actual_sql['SHIFT_PORT_GRP_PV'] = \
        df_surveydata_import_actual_sql['SHIFT_PORT_GRP_PV'].apply(pd.to_numeric, errors='coerce')

    # do the calculation step
    result_py_data = non_resp.do_ips_nrweight_calculation(
        df_surveydata_import_actual_sql, df_nr_data_import_actual,
        'NON_RESPONSE_WT', 'SERIAL')

    # ###########################
    # run checks
    # ###########################

    # Retrieve and sort python calculated dataframes
    py_survey_data = result_py_data[0]
    py_survey_data = py_survey_data.sort_values(by='SERIAL')
    py_survey_data.index = range(0, len(py_survey_data))

    py_summary_data = result_py_data[1]
    py_summary_data.sort_values(by=NR_COLUMNS)
    py_summary_data[NR_COLUMNS] = py_summary_data[NR_COLUMNS].apply(
        pd.to_numeric, errors='coerce', downcast='float')
    py_summary_data.index = range(0, len(py_summary_data))

    # insert the csv output data into SQL and read back, this is for testing against data pulled from SQL Server
    test_result_survey = pd.read_csv(path_to_data + '/outputdata_final.csv',
                                     engine='python')
    cf.delete_from_table(OUT_TABLE_NAME)
    test_result_survey_sql = convert_dataframe_to_sql_format(
        OUT_TABLE_NAME, test_result_survey)
    test_result_survey_sql = test_result_survey_sql.sort_values(by='SERIAL')
    test_result_survey_sql.index = range(0, len(test_result_survey_sql))

    test_result_summary = pd.read_csv(path_to_data + '/summarydata_final.csv',
                                      engine='python')
    cf.delete_from_table(SUMMARY_OUT_TABLE_NAME)
    test_result_summary_sql = convert_dataframe_to_sql_format(
        SUMMARY_OUT_TABLE_NAME, test_result_summary)
    test_result_summary_sql = test_result_summary_sql.sort_values(
        by=NR_COLUMNS)
    test_result_summary_sql[NR_COLUMNS] = test_result_summary_sql[
        NR_COLUMNS].apply(pd.to_numeric, errors='coerce', downcast='float')
    test_result_summary_sql.index = range(0, len(test_result_summary_sql))

    # Assert dfs are equal
    assert_frame_equal(py_survey_data,
                       test_result_survey_sql,
                       check_dtype=False,
                       check_like=True,
                       check_less_precise=True)

    assert_frame_equal(py_summary_data,
                       test_result_summary_sql,
                       check_dtype=False,
                       check_like=True,
                       check_less_precise=True)

    # put the actual SQL data back in for the remaining steps
    cf.delete_from_table(OUT_TABLE_NAME)
    cf.delete_from_table(SUMMARY_OUT_TABLE_NAME)
    cf.insert_dataframe_into_table(OUT_TABLE_NAME, py_survey_data)
    cf.insert_dataframe_into_table(SUMMARY_OUT_TABLE_NAME, py_summary_data)

    # Update Survey Data With Non Response Wt Results
    idm.update_survey_data_with_step_results(conn, step_config)

    # ###########################
    # run checks 9
    # ###########################

    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == EXPECTED_LEN

    table_len = len(cf.get_table_values(step_config["temp_table"]))
    assert table_len == 0

    # Store Survey Data With NonResponse Wt Results
    idm.store_survey_data_with_step_results(RUN_ID, conn, step_config)

    # ###########################
    # run checks 10
    # ###########################

    # Assert SURVEY_SUBSAMPLE_TABLE was populated
    result = cf.select_data('*', idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID)
    table_len = result.shape[0]
    assert table_len == SURVEY_SUBSAMPLE_LENGTH

    # Assert all records for corresponding run_id were deleted from ps_table.
    result = cf.select_data('*', step_config["ps_table"], 'RUN_ID', RUN_ID)

    # Indicating no dataframe was pulled from SQL.
    if not result:
        assert True

    # Assert SAS_SURVEY_SUBSAMPLE_TABLE was cleansed
    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == 0

    # Store Non Response Wt Summary
    idm.store_step_summary(RUN_ID, conn, step_config)

    # ###########################
    # run checks 11
    # ###########################

    # Assert summary was populated.
    result = cf.select_data('*', step_config["ps_table"], 'RUN_ID', RUN_ID)
    table_len = result.shape[0]
    assert table_len == 207

    # Assert temp table was cleansed
    table_len = len(cf.get_table_values(step_config["sas_ps_table"]))
    assert table_len == 0
def test_unsampled_weight_step():
    # Get database connection
    conn = database_connection()

    # Run step 1 / 8
    idm.populate_survey_data_for_step(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Check all deleted tables are empty
    for table in STEP_CONFIGURATION[STEP_NAME]['delete_tables']:
        delete_result = cf.get_table_values(table)
        assert delete_result.empty

    # Check all nullified columns are NULL
    for column in STEP_CONFIGURATION[STEP_NAME]['nullify_pvs']:
        column_name = column.replace('[', '').replace(']', '')
        result = cf.select_data(column_name, idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID)
        assert result[column_name].isnull().sum() == len(result)

    # Check table has been populated
    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == EXPECTED_LEN

    # Run step 2 / 8
    idm.populate_step_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Check table has been populated
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["data_table"]))
    assert table_len == 1252

    # Run step 3 / 8
    idm.copy_step_pvs_for_survey_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Assert idm.SAS_PROCESS_VARIABLES_TABLE has been populated
    table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE))
    assert table_len == NUMBER_OF_PVS

    # Assert STEP_CONFIGURATION[STEP_NAME]["spv_table"] has been cleansed
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"]))
    assert table_len == 0

    # Run step 4 / 8
    process_variables.process(dataset='survey',
                              in_table_name='SAS_SURVEY_SUBSAMPLE',
                              out_table_name='SAS_UNSAMPLED_OOH_SPV',
                              in_id='serial')

    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"]))
    assert table_len == EXPECTED_LEN

    # Run step 5 / 8
    idm.update_survey_data_with_step_pv_output(conn, STEP_CONFIGURATION[STEP_NAME])

    # Check all columns in SAS_SURVEY_SUBSAMPLE have been altered
    result = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)
    for column in STEP_CONFIGURATION[STEP_NAME]['pv_columns']:
        column_name = column.replace("'", "")
        assert len(result[column_name]) == EXPECTED_LEN

    # Assert SAS_PROCESS_VARIABLES_TABLE has been cleansed
    table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE))
    assert table_len == 0

    # Assert spv_table has been cleansed
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"]))
    assert table_len == 0

    # Run step 6 / 8
    idm.copy_step_pvs_for_step_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Assert pv_table has been cleansed
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["pv_table"]))
    assert table_len == 0

    # Assert SAS_PROCESS_VARIABLES_TABLE was populated
    table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE))
    assert table_len == 2

    # Run step 7 / 8
    process_variables.process(dataset='unsampled',
                              in_table_name='SAS_UNSAMPLED_OOH_DATA',
                              out_table_name='SAS_UNSAMPLED_OOH_PV',
                              in_id='REC_ID')

    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["pv_table"]))
    assert table_len == 1252

    # Run step 8 / 12
    idm.update_step_data_with_step_pv_output(conn, STEP_CONFIGURATION[STEP_NAME])

    # Assert the following tables were cleansed
    deleted_tables = [STEP_CONFIGURATION[STEP_NAME]["pv_table"],
                      STEP_CONFIGURATION[STEP_NAME]["temp_table"],
                      idm.SAS_PROCESS_VARIABLES_TABLE,
                      STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]]

    for table in deleted_tables:
        table_len = len(cf.get_table_values(table))
        assert table_len == 0

    # Get and test Survey data input
    sas_survey_data = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)
    sas_survey_data.to_csv(TEST_DATA_DIR + '\survey_data_in_actual.csv', index=False)

    df_survey_actual = pd.read_csv(TEST_DATA_DIR + '\survey_data_in_actual.csv', engine='python')
    df_survey_target = pd.read_csv(TEST_DATA_DIR + '\survey_data_in_target.csv', engine='python')

    df_survey_actual = sort_and_set_index(df_survey_actual,'SERIAL')
    df_survey_target = sort_and_set_index(df_survey_target,'SERIAL')

    # Drop the EXPENDCODE columns because of format issue
    df_check_a = df_survey_actual.drop(columns=['EXPENDCODE'])
    df_check_t = df_survey_target.drop(columns=['EXPENDCODE'])#[['UNSAMP_PORT_GRP_PV', 'UNSAMP_REGION_GRP_PV']]

    assert_frame_equal(df_check_a, df_check_t, check_dtype=False)

    # Get and test Unsampled data input
    sas_unsampled_data = cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["data_table"])

    sas_unsampled_data.to_csv(TEST_DATA_DIR + r'\unsampled_data_in_actual.csv', index=False)

    df_unsampled_actual = pd.read_csv(TEST_DATA_DIR + r'\unsampled_data_in_actual.csv', engine='python')
    df_unsampled_target = pd.read_csv(TEST_DATA_DIR + r'\unsampled_data_in_target.csv', engine='python')

    df_unsampled_actual = sort_and_set_index(df_unsampled_actual, ['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL'])
    df_unsampled_target = sort_and_set_index(df_unsampled_target, ['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL'])

    # Drop unique REC_ID column
    df_unsampled_test = df_unsampled_actual.drop('REC_ID', axis=1)

    # Fix format of comparison data
    df_unsampled_test['REGION'] = df_unsampled_test['REGION'].replace(0, np.NaN)
    df_unsampled_target['UNSAMP_REGION_GRP_PV'] = df_unsampled_target['UNSAMP_REGION_GRP_PV'].fillna(0)
    df_unsampled_target['UNSAMP_REGION_GRP_PV'] = df_unsampled_target['UNSAMP_REGION_GRP_PV'].astype(int)

    assert_frame_equal(df_unsampled_test, df_unsampled_target, check_dtype=False)

    # TODO: Compare integration summary input with xml summary input
    df_unsampled_actual.to_csv(r'S:\CASPA\IPS\Testing\scratch\summary_in_xml.csv', index=False)

    # Run step 9 / 12
    output_data, summary_data = do_ips_unsampled_weight_calculation(df_survey_actual,
                                                                    serial_num='SERIAL',
                                                                    shift_weight='SHIFT_WT',
                                                                    nr_weight='NON_RESPONSE_WT',
                                                                    min_weight='MINS_WT',
                                                                    traffic_weight='TRAFFIC_WT',
                                                                    out_of_hours_weight="UNSAMP_TRAFFIC_WT",
                                                                    df_ustotals=df_unsampled_actual,
                                                                    min_count_threshold=30)

    # Sort and reset the index of the results produced by the calculation
    output_data = sort_and_set_index(output_data, 'SERIAL')
    summary_data = sort_and_set_index(summary_data, ['UNSAMP_PORT_GRP_PV','UNSAMP_REGION_GRP_PV','ARRIVEDEPART'])

    # Import the expected results, then sort and reset their index
    test_result_survey = pd.read_csv(TEST_DATA_DIR + r'\outputdata_final.csv', engine='python')
    cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"])
    test_result_survey = convert_dataframe_to_sql_format(STEP_CONFIGURATION[STEP_NAME]["temp_table"], test_result_survey)
    test_result_survey = sort_and_set_index(test_result_survey, 'SERIAL')

    test_result_summary = pd.read_csv(TEST_DATA_DIR + r'\summarydata_final.csv', engine='python')
    cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"])
    test_result_summary = convert_dataframe_to_sql_format(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"], test_result_summary)

    test_result_summary.ARRIVEDEPART = test_result_summary.ARRIVEDEPART.astype(int)
    test_result_summary.UNSAMP_REGION_GRP_PV = pd.to_numeric(test_result_summary.UNSAMP_REGION_GRP_PV, errors='coerce')
    test_result_summary.CASES = test_result_summary.CASES.astype(int)

    test_result_summary = sort_and_set_index(test_result_summary, ['UNSAMP_PORT_GRP_PV','UNSAMP_REGION_GRP_PV','ARRIVEDEPART'])

    # Assert dfs are equal
    assert_frame_equal(output_data, test_result_survey, check_dtype=False, check_like=True,
                       check_less_precise=True)

    assert_frame_equal(summary_data, test_result_summary, check_dtype=False, check_like=True,
                       check_less_precise=True)

    # Put the SQL data back in for the remaining steps
    cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"])
    cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"])
    cf.insert_dataframe_into_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"], output_data)
    cf.insert_dataframe_into_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"], summary_data)

    # Check the number of records in the output tables are correct
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["temp_table"]))
    assert table_len == EXPECTED_LEN

    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]))
    assert table_len == 203

    # Run step 10 / 12
    idm.update_survey_data_with_step_results(conn, STEP_CONFIGURATION[STEP_NAME])

    # Check record count in the
    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == EXPECTED_LEN

    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["temp_table"]))
    assert table_len == 0

    # Run step 11 / 12
    idm.store_survey_data_with_step_results(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Assert SURVEY_SUBSAMPLE_TABLE was populated
    result = cf.select_data('*', idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID)
    table_len = result.shape[0]
    assert table_len == 17731

    # Assert all records for corresponding run_id were deleted from ps_table.
    result = cf.select_data('*', STEP_CONFIGURATION[STEP_NAME]["ps_table"], 'RUN_ID', RUN_ID)
    # Indicating no dataframe was pulled from SQL.
    if not result:
        assert True

    # Assert SAS_SURVEY_SUBSAMPLE_TABLE was cleansed
    table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE))
    assert table_len == 0

    # Run step 12 / 12
    idm.store_step_summary(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME])

    # Assert summary was populated.
    result = cf.select_data('*', STEP_CONFIGURATION[STEP_NAME]["ps_table"], 'RUN_ID', RUN_ID)
    table_len = result.shape[0]
    assert table_len == 203

    # Assert temp table was cleansed
    table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]))
    assert table_len == 0