def teardown_module(module): # Delete any previous records from the Survey_Subsample tables for the given run ID ctf.reset_test_tables(RUN_ID, STEP_CONFIGURATION[STEP_NAME]) # Cleanses Survey Subsample table. cf.delete_from_table(idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', RUN_ID) print("Teardown")
def teardown_module(module): """ Teardown any state that was previously setup with a setup_module method. """ # Deletes data from temporary tables as necessary. ctf.reset_test_tables(RUN_ID, STEP_CONFIGURATION[STEP_NAME]) # Cleanses Survey Subsample table. cf.delete_from_table(idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', '=', RUN_ID) print("Duration: {}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - START_TIME))))
def reset_tables(): """ Author : Thomas Mahoney Date : 7 Sep 2018 Purpose : Deletes records from tables associated with the dataimport test. Parameters : NA Returns : NA """ """ Deletes records from tables associated with the dataimport test. """ print( "Deleting records from tables associated with the dataimport test...") tables_to_delete_run_id = [ idm.SURVEY_SUBSAMPLE_TABLE, "TRAFFIC_DATA", "SHIFT_DATA", "NON_RESPONSE_DATA", "UNSAMPLED_OOH_DATA" ] for table in tables_to_delete_run_id: cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID) cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_OCTOBER_2017") cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_NOVEMBER_2017") cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_DECEMBER_2017") cf.delete_from_table(table, 'RUN_ID', '=', RUN_ID + "_Q3_2017") tables_to_delete_all = [ 'SAS_SURVEY_SUBSAMPLE', 'SAS_SHIFT_DATA', 'SAS_NON_RESPONSE_DATA', 'SAS_TRAFFIC_DATA', 'SAS_UNSAMPLED_OOH_DATA', ] for table in tables_to_delete_all: cf.delete_from_table(table) print("Import table test records deleted.")
def test_non_response_weight_step(path_to_data): # Get database connection conn = database_connection() # Run step 1 idm.populate_survey_data_for_step(RUN_ID, conn, step_config) # ########################### # run checks 1 # ########################### # Check all deleted tables are empty for table in step_config['delete_tables']: delete_result = cf.get_table_values(table) assert delete_result.empty # Check all nullified columns are NULL for column in step_config['nullify_pvs']: column_name = column.replace('[', '').replace(']', '') result = cf.select_data(column_name, idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID) assert result[column_name].isnull().sum() == len(result) # Check table has been populated table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == EXPECTED_LEN # Run step 2 idm.populate_step_data(RUN_ID, conn, step_config) # ########################### # run checks 2 # ########################### # Check table has been populated table_len = len(cf.get_table_values(step_config["data_table"])) assert table_len == NON_RESPONSE_DATA_LENGTH # Run step 3 idm.copy_step_pvs_for_survey_data(RUN_ID, conn, step_config) # ########################### # run checks 3 # ########################### # Get all values from the sas_process_variables table results = cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE) # Check number of PV records moved matches number passed in through step configuration. assert len(results) == len(step_config['pv_columns']) # Get the spv_table values and ensure all records have been deleted results = cf.get_table_values(step_config['spv_table']) assert len(results) == 0 # ########################### # run checks 3 # ########################### # Run step 4 : Apply Non Response Wt PVs On Survey Data process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_NON_RESPONSE_SPV', in_id='serial') # ########################### # run checks 4 # ########################### table_len = len(cf.get_table_values(step_config["spv_table"])) assert table_len == EXPECTED_LEN # Run step 5 : Update Survey Data with Non Response Wt PVs Output idm.update_survey_data_with_step_pv_output(conn, step_config) # ########################### # run checks 5 # ########################### # Check all columns in SAS_SURVEY_SUBSAMPLE have been altered result = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) for column in step_config['pv_columns']: column_name = column.replace("'", "") assert len(result[column_name]) == EXPECTED_LEN assert result[column_name].sum() != 0 # Assert SAS_PROCESS_VARIABLES_TABLE has been cleansed table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)) assert table_len == 0 # Assert spv_table has been cleansed table_len = len(cf.get_table_values(step_config["spv_table"])) assert table_len == 0 # Run step 6 : Copy Non Response Wt PVs for Non Response Data idm.copy_step_pvs_for_step_data(RUN_ID, conn, step_config) # ########################### # run checks 6 # ########################### # Assert pv_table has been cleansed table_len = len(cf.get_table_values(step_config["pv_table"])) assert table_len == 0 # Assert SAS_PROCESS_VARIABLES_TABLE was populated table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)) assert table_len == NON_RESPONSE_SAS_PROCESS_VARIABLE_TABLE_LENGTH # Run step 7 : Apply Non Response Wt PVs On Non Response Data process_variables.process(dataset='non_response', in_table_name='SAS_NON_RESPONSE_DATA', out_table_name='SAS_NON_RESPONSE_PV', in_id='REC_ID') # ########################### # run checks 7 # ########################### table_len = len(cf.get_table_values(step_config["pv_table"])) assert table_len == NON_RESPONSE_DATA_LENGTH # Run step 8 : Update NonResponse Data With PVs Output idm.update_step_data_with_step_pv_output(conn, step_config) # ########################### # run checks 8 # ########################### # Assert data table was populated table_len = len(cf.get_table_values(step_config["data_table"])) assert table_len == NON_RESPONSE_DATA_LENGTH # Assert the following tables were cleansed deleted_tables = [ step_config["pv_table"], step_config["temp_table"], idm.SAS_PROCESS_VARIABLES_TABLE, step_config["sas_ps_table"] ] for table in deleted_tables: table_len = len(cf.get_table_values(table)) assert table_len == 0 # ############################## # Calculate Non Response Weight # ############################## # dataimport the data from SQL and sort df_surveydata_import_actual = cf.get_table_values( idm.SAS_SURVEY_SUBSAMPLE_TABLE) df_surveydata_import_actual_sql = df_surveydata_import_actual.sort_values( by='SERIAL') df_surveydata_import_actual_sql.index = range( 0, len(df_surveydata_import_actual_sql)) df_nr_data_import_actual = cf.get_table_values( SAS_NON_RESPONSE_DATA_TABLE_NAME) # fix formatting in actual data df_surveydata_import_actual_sql.drop(['EXPENDCODE'], axis=1, inplace=True) df_surveydata_import_actual_sql['SHIFT_PORT_GRP_PV'] = \ df_surveydata_import_actual_sql['SHIFT_PORT_GRP_PV'].apply(pd.to_numeric, errors='coerce') # do the calculation step result_py_data = non_resp.do_ips_nrweight_calculation( df_surveydata_import_actual_sql, df_nr_data_import_actual, 'NON_RESPONSE_WT', 'SERIAL') # ########################### # run checks # ########################### # Retrieve and sort python calculated dataframes py_survey_data = result_py_data[0] py_survey_data = py_survey_data.sort_values(by='SERIAL') py_survey_data.index = range(0, len(py_survey_data)) py_summary_data = result_py_data[1] py_summary_data.sort_values(by=NR_COLUMNS) py_summary_data[NR_COLUMNS] = py_summary_data[NR_COLUMNS].apply( pd.to_numeric, errors='coerce', downcast='float') py_summary_data.index = range(0, len(py_summary_data)) # insert the csv output data into SQL and read back, this is for testing against data pulled from SQL Server test_result_survey = pd.read_csv(path_to_data + '/outputdata_final.csv', engine='python') cf.delete_from_table(OUT_TABLE_NAME) test_result_survey_sql = convert_dataframe_to_sql_format( OUT_TABLE_NAME, test_result_survey) test_result_survey_sql = test_result_survey_sql.sort_values(by='SERIAL') test_result_survey_sql.index = range(0, len(test_result_survey_sql)) test_result_summary = pd.read_csv(path_to_data + '/summarydata_final.csv', engine='python') cf.delete_from_table(SUMMARY_OUT_TABLE_NAME) test_result_summary_sql = convert_dataframe_to_sql_format( SUMMARY_OUT_TABLE_NAME, test_result_summary) test_result_summary_sql = test_result_summary_sql.sort_values( by=NR_COLUMNS) test_result_summary_sql[NR_COLUMNS] = test_result_summary_sql[ NR_COLUMNS].apply(pd.to_numeric, errors='coerce', downcast='float') test_result_summary_sql.index = range(0, len(test_result_summary_sql)) # Assert dfs are equal assert_frame_equal(py_survey_data, test_result_survey_sql, check_dtype=False, check_like=True, check_less_precise=True) assert_frame_equal(py_summary_data, test_result_summary_sql, check_dtype=False, check_like=True, check_less_precise=True) # put the actual SQL data back in for the remaining steps cf.delete_from_table(OUT_TABLE_NAME) cf.delete_from_table(SUMMARY_OUT_TABLE_NAME) cf.insert_dataframe_into_table(OUT_TABLE_NAME, py_survey_data) cf.insert_dataframe_into_table(SUMMARY_OUT_TABLE_NAME, py_summary_data) # Update Survey Data With Non Response Wt Results idm.update_survey_data_with_step_results(conn, step_config) # ########################### # run checks 9 # ########################### table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == EXPECTED_LEN table_len = len(cf.get_table_values(step_config["temp_table"])) assert table_len == 0 # Store Survey Data With NonResponse Wt Results idm.store_survey_data_with_step_results(RUN_ID, conn, step_config) # ########################### # run checks 10 # ########################### # Assert SURVEY_SUBSAMPLE_TABLE was populated result = cf.select_data('*', idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID) table_len = result.shape[0] assert table_len == SURVEY_SUBSAMPLE_LENGTH # Assert all records for corresponding run_id were deleted from ps_table. result = cf.select_data('*', step_config["ps_table"], 'RUN_ID', RUN_ID) # Indicating no dataframe was pulled from SQL. if not result: assert True # Assert SAS_SURVEY_SUBSAMPLE_TABLE was cleansed table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == 0 # Store Non Response Wt Summary idm.store_step_summary(RUN_ID, conn, step_config) # ########################### # run checks 11 # ########################### # Assert summary was populated. result = cf.select_data('*', step_config["ps_table"], 'RUN_ID', RUN_ID) table_len = result.shape[0] assert table_len == 207 # Assert temp table was cleansed table_len = len(cf.get_table_values(step_config["sas_ps_table"])) assert table_len == 0
def test_unsampled_weight_step(): # Get database connection conn = database_connection() # Run step 1 / 8 idm.populate_survey_data_for_step(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Check all deleted tables are empty for table in STEP_CONFIGURATION[STEP_NAME]['delete_tables']: delete_result = cf.get_table_values(table) assert delete_result.empty # Check all nullified columns are NULL for column in STEP_CONFIGURATION[STEP_NAME]['nullify_pvs']: column_name = column.replace('[', '').replace(']', '') result = cf.select_data(column_name, idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID) assert result[column_name].isnull().sum() == len(result) # Check table has been populated table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == EXPECTED_LEN # Run step 2 / 8 idm.populate_step_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Check table has been populated table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["data_table"])) assert table_len == 1252 # Run step 3 / 8 idm.copy_step_pvs_for_survey_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Assert idm.SAS_PROCESS_VARIABLES_TABLE has been populated table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)) assert table_len == NUMBER_OF_PVS # Assert STEP_CONFIGURATION[STEP_NAME]["spv_table"] has been cleansed table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"])) assert table_len == 0 # Run step 4 / 8 process_variables.process(dataset='survey', in_table_name='SAS_SURVEY_SUBSAMPLE', out_table_name='SAS_UNSAMPLED_OOH_SPV', in_id='serial') table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"])) assert table_len == EXPECTED_LEN # Run step 5 / 8 idm.update_survey_data_with_step_pv_output(conn, STEP_CONFIGURATION[STEP_NAME]) # Check all columns in SAS_SURVEY_SUBSAMPLE have been altered result = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) for column in STEP_CONFIGURATION[STEP_NAME]['pv_columns']: column_name = column.replace("'", "") assert len(result[column_name]) == EXPECTED_LEN # Assert SAS_PROCESS_VARIABLES_TABLE has been cleansed table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)) assert table_len == 0 # Assert spv_table has been cleansed table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["spv_table"])) assert table_len == 0 # Run step 6 / 8 idm.copy_step_pvs_for_step_data(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Assert pv_table has been cleansed table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["pv_table"])) assert table_len == 0 # Assert SAS_PROCESS_VARIABLES_TABLE was populated table_len = len(cf.get_table_values(idm.SAS_PROCESS_VARIABLES_TABLE)) assert table_len == 2 # Run step 7 / 8 process_variables.process(dataset='unsampled', in_table_name='SAS_UNSAMPLED_OOH_DATA', out_table_name='SAS_UNSAMPLED_OOH_PV', in_id='REC_ID') table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["pv_table"])) assert table_len == 1252 # Run step 8 / 12 idm.update_step_data_with_step_pv_output(conn, STEP_CONFIGURATION[STEP_NAME]) # Assert the following tables were cleansed deleted_tables = [STEP_CONFIGURATION[STEP_NAME]["pv_table"], STEP_CONFIGURATION[STEP_NAME]["temp_table"], idm.SAS_PROCESS_VARIABLES_TABLE, STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]] for table in deleted_tables: table_len = len(cf.get_table_values(table)) assert table_len == 0 # Get and test Survey data input sas_survey_data = cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE) sas_survey_data.to_csv(TEST_DATA_DIR + '\survey_data_in_actual.csv', index=False) df_survey_actual = pd.read_csv(TEST_DATA_DIR + '\survey_data_in_actual.csv', engine='python') df_survey_target = pd.read_csv(TEST_DATA_DIR + '\survey_data_in_target.csv', engine='python') df_survey_actual = sort_and_set_index(df_survey_actual,'SERIAL') df_survey_target = sort_and_set_index(df_survey_target,'SERIAL') # Drop the EXPENDCODE columns because of format issue df_check_a = df_survey_actual.drop(columns=['EXPENDCODE']) df_check_t = df_survey_target.drop(columns=['EXPENDCODE'])#[['UNSAMP_PORT_GRP_PV', 'UNSAMP_REGION_GRP_PV']] assert_frame_equal(df_check_a, df_check_t, check_dtype=False) # Get and test Unsampled data input sas_unsampled_data = cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["data_table"]) sas_unsampled_data.to_csv(TEST_DATA_DIR + r'\unsampled_data_in_actual.csv', index=False) df_unsampled_actual = pd.read_csv(TEST_DATA_DIR + r'\unsampled_data_in_actual.csv', engine='python') df_unsampled_target = pd.read_csv(TEST_DATA_DIR + r'\unsampled_data_in_target.csv', engine='python') df_unsampled_actual = sort_and_set_index(df_unsampled_actual, ['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL']) df_unsampled_target = sort_and_set_index(df_unsampled_target, ['PORTROUTE', 'REGION', 'ARRIVEDEPART', 'UNSAMP_TOTAL']) # Drop unique REC_ID column df_unsampled_test = df_unsampled_actual.drop('REC_ID', axis=1) # Fix format of comparison data df_unsampled_test['REGION'] = df_unsampled_test['REGION'].replace(0, np.NaN) df_unsampled_target['UNSAMP_REGION_GRP_PV'] = df_unsampled_target['UNSAMP_REGION_GRP_PV'].fillna(0) df_unsampled_target['UNSAMP_REGION_GRP_PV'] = df_unsampled_target['UNSAMP_REGION_GRP_PV'].astype(int) assert_frame_equal(df_unsampled_test, df_unsampled_target, check_dtype=False) # TODO: Compare integration summary input with xml summary input df_unsampled_actual.to_csv(r'S:\CASPA\IPS\Testing\scratch\summary_in_xml.csv', index=False) # Run step 9 / 12 output_data, summary_data = do_ips_unsampled_weight_calculation(df_survey_actual, serial_num='SERIAL', shift_weight='SHIFT_WT', nr_weight='NON_RESPONSE_WT', min_weight='MINS_WT', traffic_weight='TRAFFIC_WT', out_of_hours_weight="UNSAMP_TRAFFIC_WT", df_ustotals=df_unsampled_actual, min_count_threshold=30) # Sort and reset the index of the results produced by the calculation output_data = sort_and_set_index(output_data, 'SERIAL') summary_data = sort_and_set_index(summary_data, ['UNSAMP_PORT_GRP_PV','UNSAMP_REGION_GRP_PV','ARRIVEDEPART']) # Import the expected results, then sort and reset their index test_result_survey = pd.read_csv(TEST_DATA_DIR + r'\outputdata_final.csv', engine='python') cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"]) test_result_survey = convert_dataframe_to_sql_format(STEP_CONFIGURATION[STEP_NAME]["temp_table"], test_result_survey) test_result_survey = sort_and_set_index(test_result_survey, 'SERIAL') test_result_summary = pd.read_csv(TEST_DATA_DIR + r'\summarydata_final.csv', engine='python') cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]) test_result_summary = convert_dataframe_to_sql_format(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"], test_result_summary) test_result_summary.ARRIVEDEPART = test_result_summary.ARRIVEDEPART.astype(int) test_result_summary.UNSAMP_REGION_GRP_PV = pd.to_numeric(test_result_summary.UNSAMP_REGION_GRP_PV, errors='coerce') test_result_summary.CASES = test_result_summary.CASES.astype(int) test_result_summary = sort_and_set_index(test_result_summary, ['UNSAMP_PORT_GRP_PV','UNSAMP_REGION_GRP_PV','ARRIVEDEPART']) # Assert dfs are equal assert_frame_equal(output_data, test_result_survey, check_dtype=False, check_like=True, check_less_precise=True) assert_frame_equal(summary_data, test_result_summary, check_dtype=False, check_like=True, check_less_precise=True) # Put the SQL data back in for the remaining steps cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"]) cf.delete_from_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"]) cf.insert_dataframe_into_table(STEP_CONFIGURATION[STEP_NAME]["temp_table"], output_data) cf.insert_dataframe_into_table(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"], summary_data) # Check the number of records in the output tables are correct table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["temp_table"])) assert table_len == EXPECTED_LEN table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"])) assert table_len == 203 # Run step 10 / 12 idm.update_survey_data_with_step_results(conn, STEP_CONFIGURATION[STEP_NAME]) # Check record count in the table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == EXPECTED_LEN table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["temp_table"])) assert table_len == 0 # Run step 11 / 12 idm.store_survey_data_with_step_results(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Assert SURVEY_SUBSAMPLE_TABLE was populated result = cf.select_data('*', idm.SURVEY_SUBSAMPLE_TABLE, 'RUN_ID', RUN_ID) table_len = result.shape[0] assert table_len == 17731 # Assert all records for corresponding run_id were deleted from ps_table. result = cf.select_data('*', STEP_CONFIGURATION[STEP_NAME]["ps_table"], 'RUN_ID', RUN_ID) # Indicating no dataframe was pulled from SQL. if not result: assert True # Assert SAS_SURVEY_SUBSAMPLE_TABLE was cleansed table_len = len(cf.get_table_values(idm.SAS_SURVEY_SUBSAMPLE_TABLE)) assert table_len == 0 # Run step 12 / 12 idm.store_step_summary(RUN_ID, conn, STEP_CONFIGURATION[STEP_NAME]) # Assert summary was populated. result = cf.select_data('*', STEP_CONFIGURATION[STEP_NAME]["ps_table"], 'RUN_ID', RUN_ID) table_len = result.shape[0] assert table_len == 203 # Assert temp table was cleansed table_len = len(cf.get_table_values(STEP_CONFIGURATION[STEP_NAME]["sas_ps_table"])) assert table_len == 0