def testDeleteInstancesWithMissing2(self):
        # Deletes instances and checks arrays for correctness
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        se.change_header_name("N1", "gender")
        se.delete_all_instances_without_header_data("gender")
        se.delete_all_instances_without_header_data("N2")
        se.delete_all_instances_without_header_data("N3")

        se.add_attribute_converter_random("gender")
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()

        cHeaders = np.array(["gender", "N2", "N3"])
        cMap = {
            "phenotype": {
                "china": "0"
            },
            "gender": {
                "male": "0",
                "female": "1"
            },
            "N3": {
                "young": "0",
                "old": "1"
            }
        }
        cArray = np.array([["0", "1.2", "0"], ["1", "-0.4", "1"]])
        cPArray = np.array(["0", "0"])
        self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
        self.assertTrue(np.array_equal("phenotype", se.classLabel))
        self.assertTrue(np.array_equal(cArray, se.dataFeatures))
        self.assertTrue(np.array_equal(cPArray, se.dataPhenotypes))
        self.assertTrue(se.map == cMap)
    def testNumericCheck(self):
        # Checks non missing numeric
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N1")
        se.convert_all_attributes()
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()
        self.assertTrue(se.check_is_full_numeric())

        dataPath = os.path.join(THIS_DIR,
                                "test/DataSets/Tests/MissingFeatureData.csv")
        se2 = StringEnumerator(dataPath, "phenotype")
        self.assertTrue(se2.check_is_full_numeric())
 def testget_params2(self):
     # Get Params Test
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_header_name("N1", "gender")
     se.change_header_name("N2", "floats")
     se.change_header_name("N3", "age")
     se.change_class_name("country")
     se.add_attribute_converter("gender",
                                np.array(["female", "male", "NA", "other"]))
     se.add_attribute_converter("age", np.array(["old", "young"]))
     se.add_class_converter_random()
     se.convert_all_attributes()
     dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params()
     cHeaders = np.array(["gender", "floats", "age"])
     cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0],
                           [np.nan, 0, 1]])
     cPhenotypes = np.array([0, 1, 0, 2])
     self.assertEqual("country", classLabel)
     self.assertTrue(np.array_equal(cHeaders, dataHeaders))
     self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True))
     self.assertTrue(
         np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))