def testDeleteInstancesWithMissing2(self): # Deletes instances and checks arrays for correctness dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.delete_all_instances_without_header_data("gender") se.delete_all_instances_without_header_data("N2") se.delete_all_instances_without_header_data("N3") se.add_attribute_converter_random("gender") se.add_attribute_converter_random("N3") se.add_class_converter_random() se.convert_all_attributes() cHeaders = np.array(["gender", "N2", "N3"]) cMap = { "phenotype": { "china": "0" }, "gender": { "male": "0", "female": "1" }, "N3": { "young": "0", "old": "1" } } cArray = np.array([["0", "1.2", "0"], ["1", "-0.4", "1"]]) cPArray = np.array(["0", "0"]) self.assertTrue(np.array_equal(cHeaders, se.dataHeaders)) self.assertTrue(np.array_equal("phenotype", se.classLabel)) self.assertTrue(np.array_equal(cArray, se.dataFeatures)) self.assertTrue(np.array_equal(cPArray, se.dataPhenotypes)) self.assertTrue(se.map == cMap)
def testNumericCheck(self): # Checks non missing numeric dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") self.assertFalse(se.check_is_full_numeric()) se.add_attribute_converter_random("N1") se.convert_all_attributes() self.assertFalse(se.check_is_full_numeric()) se.add_attribute_converter_random("N3") se.add_class_converter_random() se.convert_all_attributes() self.assertTrue(se.check_is_full_numeric()) dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/MissingFeatureData.csv") se2 = StringEnumerator(dataPath, "phenotype") self.assertTrue(se2.check_is_full_numeric())
def testget_params2(self): # Get Params Test dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.change_header_name("N2", "floats") se.change_header_name("N3", "age") se.change_class_name("country") se.add_attribute_converter("gender", np.array(["female", "male", "NA", "other"])) se.add_attribute_converter("age", np.array(["old", "young"])) se.add_class_converter_random() se.convert_all_attributes() dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params() cHeaders = np.array(["gender", "floats", "age"]) cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0], [np.nan, 0, 1]]) cPhenotypes = np.array([0, 1, 0, 2]) self.assertEqual("country", classLabel) self.assertTrue(np.array_equal(cHeaders, dataHeaders)) self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True)) self.assertTrue( np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))