def test_apply_arabic_rules_function(self): ''' Testing the apply_arabic_rules function ''' df_mock = import_dataset(mock_filename) # Testing whether selecting columns to apply features to which are not in the dataframe raises a ValueError, as it should self.assertRaises(ValueError, apply_arabic_rules, df_mock, arabic_rules_dict, ['not_a_col', 'not_a_col_either'])
def test_arabic_col_cleaner_function(self): ''' Testing the arabic_col_cleaner function ''' df_mock = import_dataset(mock_filename) # Testing whether selecting columns which are not in the dataframe raises a ValueError, as it should self.assertRaises(ValueError, arabic_col_cleaner, df_mock, ['not_a_col', 'not_a_col_either'])
def test_select_features_for_classification_function(self): ''' Testing the select_features_for_classification function ''' df_mock = import_dataset(mock_filename) # Testing whether selecting a file to import the features from does not exist raises an IOError, as it should self.assertRaises(IOError, select_features_for_classification, df_mock, 'not_a_file') self.assertRaises(IOError, select_features_for_classification, df_mock, 'inputs/not_a_file_either')
def test_create_hdf_file_function(self): ''' Testing the create_hdf_file function ''' df_mock = import_dataset(mock_filename) # Testing whether selecting columns which are not in the df raises a ValueError, as it should self.assertRaises(ValueError, create_hdf_file, df_mock, 'hash_1') self.assertRaises(ValueError, create_hdf_file, df_mock, 'id_1', 'hash_2') self.assertRaises(ValueError, create_hdf_file, df_mock, 'id_1', 'id_2', 'xgb_prob')
def test_run_classification_function(self): ''' Testing the run_classification function ''' df_mock = import_dataset(mock_filename) # Testing whether selecting a file to import the xgboost model from does not exist raises an IOError, as it should self.assertRaises(IOError, run_classification, df_mock, 'not_a_file') self.assertRaises(IOError, run_classification, df_mock, 'inputs/not_a_file_either') self.assertRaises(TypeError, run_classification, df_mock)
def test_df_column_cleaner_function(self): ''' Testing the df_column_cleaner function ''' df_mock = import_dataset(mock_filename) # Testing various cases in which the columns do not exist in the dataframe, so the function should raise a ValueError self.assertRaises(ValueError, df_column_cleaner, df_mock, ['hash_1', 'hash_2'], ['match_1', 'match_2'], ['date_of_death_1', 'date_of_death_2'], ['location_1', 'location_2']) self.assertRaises(ValueError, df_column_cleaner, df_mock, ['id_1', 'id_2'], ['matching_1', 'matching_2'], ['date_of_death_1', 'date_of_death_2'], ['location_1', 'location_2']) self.assertRaises(ValueError, df_column_cleaner, df_mock, ['id_1', 'id_2'], ['match_1', 'match_2'], ['date_of_dying_1', 'date_of_dying_2'], ['location_1', 'location_2']) self.assertRaises(ValueError, df_column_cleaner, df_mock, ['id_1', 'id_2'], ['match_1', 'match_2'], ['date_of_death_1', 'date_of_death_2'], ['loc_1', 'loc_2']) # Testing whether the columns of the dataframe in output are the ones expected to be expected_col_names = [ 'hash_1', 'hash_2', 'name_1', 'name_2', 'date_of_death_1', 'date_of_death_2', 'location_1', 'location_2' ] self.assertEqual( set(expected_col_names), set( df_column_cleaner( df_mock, ['id_1', 'id_2'], ['match_1', 'match_2'], ['date_of_death_1', 'date_of_death_2'], ['location_1', 'location_2']).columns.values))