def testDeleteNonexistentAttribute(self):
     # Deletes nonexistent attribute
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.delete_attribute("N")
     self.assertTrue("Header Doesn't Exist" in str(context.exception))
 def testget_paramsFail(self):
     # Get params when not all features/class have been enumerated
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.get_params()
     self.assertTrue("Features and Phenotypes must be fully numeric" in str(
         context.exception))
 def testchange_header_nameInvalid2(self):
     # Changes non existing header name should raise exception
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.change_header_name("N", "N5")
     self.assertTrue(
         "Current Header Doesn't Exist" in str(context.exception))
    def testchange_header_nameInvalid(self):
        # Changes header name to an existing header or class name should raise exception
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        with self.assertRaises(Exception) as context:
            se.change_header_name("N1", "N2")

        self.assertTrue(
            "New Class Name Cannot Be An Already Existing Data Header or Phenotype Name"
            in str(context.exception))
 def testdelete_attribute(self):
     # Deletes attributes and checks map, headers, and arrays for correctness
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_header_name("N1", "gender")
     se.add_attribute_converter_random("gender")
     se.add_attribute_converter_random("N3")
     se.delete_attribute("gender")
     cHeaders = np.array(["N2", "N3"])
     cMap = {"N3": {"young": "0", "old": "1"}}
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
     self.assertTrue(np.array_equal("phenotype", se.classLabel))
     self.assertTrue(se.map == cMap)
 def testInitHeaders(self):
     # Tests if init gets the headers correct
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Tests/MissingFeatureData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cHeaders = np.array(["N1", "N2", "N3", "N4"])
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
 def testInitFeaturesAndClassRemoval(self):
     # Tests if init gets the features and class arrays correct given missing phenotype data
     dataPath = os.path.join(
         THIS_DIR, "test/DataSets/Tests/MissingFeatureAndPhenotypeData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cFeatures = np.array([["1.0", "NA", "1.0", "4.0"],
                           ["NA", "1.0", "NA", "1.0"],
                           ["6.0", "NA", "1.0", "1.0"]])
     cClasses = np.array(["1.0", "0.0", "1.0"])
     self.assertTrue(np.array_equal(cFeatures, se.dataFeatures))
     self.assertTrue(np.array_equal(cClasses, se.dataPhenotypes))
 def testInitMissingData(self):
     # Tests if init filters missing data into NAs
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Tests/MissingFeatureData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cFeatures = np.array([["1.0", "NA", "1.0", "4.0"],
                           ["2.0", "0.0", "1.0", "NA"],
                           ["4.0", "NA", "1.0", "2.0"],
                           ["NA", "1.0", "NA", "1.0"],
                           ["6.0", "NA", "1.0", "1.0"]])
     self.assertTrue(np.array_equal(cFeatures, se.dataFeatures))
    def testChangeClassAndHeaderNames2(self):
        # Changes header and class names. Checks map, and classLabel/dataHeaders correctness
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        se.add_class_converter_random()
        se.change_header_name("N1", "gender")
        se.add_attribute_converter_random("gender")
        se.change_header_name("gender", "Gender")
        se.add_attribute_converter_random("Gender")
        se.add_attribute_converter_random("Gender")
        se.add_attribute_converter_random("gender")
        se.add_attribute_converter_random("N3")
        se.change_header_name("N3", "Age")

        cHeaders = np.array(["Gender", "N2", "Age"])
        cMap = {
            "phenotype": {
                "china": "0",
                "japan": "1",
                "russia": "2"
            },
            "Gender": {
                "male": "0",
                "female": "1"
            },
            "Age": {
                "young": "0",
                "old": "1"
            }
        }
        self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
        self.assertTrue(np.array_equal("phenotype", se.classLabel))
        self.assertTrue(se.map == cMap)
 def testChangeClassAndHeaderNames(self):
     # Changes header and class names. Checks map, and classLabel/dataHeaders correctness
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_class_name("country")
     se.change_header_name("N1", "gender")
     se.change_header_name("N2", "N1")
     se.change_header_name("N1", "floats")
     se.change_header_name("N3", "phenotype")
     se.change_header_name("phenotype", "age")
     cHeaders = np.array(["gender", "floats", "age"])
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
     self.assertTrue(np.array_equal("country", se.classLabel))
 def testget_params2(self):
     # Get Params Test
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_header_name("N1", "gender")
     se.change_header_name("N2", "floats")
     se.change_header_name("N3", "age")
     se.change_class_name("country")
     se.add_attribute_converter("gender",
                                np.array(["female", "male", "NA", "other"]))
     se.add_attribute_converter("age", np.array(["old", "young"]))
     se.add_class_converter_random()
     se.convert_all_attributes()
     dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params()
     cHeaders = np.array(["gender", "floats", "age"])
     cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0],
                           [np.nan, 0, 1]])
     cPhenotypes = np.array([0, 1, 0, 2])
     self.assertEqual("country", classLabel)
     self.assertTrue(np.array_equal(cHeaders, dataHeaders))
     self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True))
     self.assertTrue(
         np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))
    def testNumericCheck(self):
        # Checks non missing numeric
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N1")
        se.convert_all_attributes()
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()
        self.assertTrue(se.check_is_full_numeric())

        dataPath = os.path.join(THIS_DIR,
                                "test/DataSets/Tests/MissingFeatureData.csv")
        se2 = StringEnumerator(dataPath, "phenotype")
        self.assertTrue(se2.check_is_full_numeric())
    def testDeleteInstancesWithMissing2(self):
        # Deletes instances and checks arrays for correctness
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        se.change_header_name("N1", "gender")
        se.delete_all_instances_without_header_data("gender")
        se.delete_all_instances_without_header_data("N2")
        se.delete_all_instances_without_header_data("N3")

        se.add_attribute_converter_random("gender")
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()

        cHeaders = np.array(["gender", "N2", "N3"])
        cMap = {
            "phenotype": {
                "china": "0"
            },
            "gender": {
                "male": "0",
                "female": "1"
            },
            "N3": {
                "young": "0",
                "old": "1"
            }
        }
        cArray = np.array([["0", "1.2", "0"], ["1", "-0.4", "1"]])
        cPArray = np.array(["0", "0"])
        self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
        self.assertTrue(np.array_equal("phenotype", se.classLabel))
        self.assertTrue(np.array_equal(cArray, se.dataFeatures))
        self.assertTrue(np.array_equal(cPArray, se.dataPhenotypes))
        self.assertTrue(se.map == cMap)