示例#1
0
 def test_prepare_outcomes(self):
     results = test_utilities.load_flu_data()
     
     # string type correct
     ooi = 'nr deaths'
     results[1][ooi] = results[1]['deceased population region 1'][:,-1]
     y, categorical = fs._prepare_outcomes(results[1], ooi)
     
     self.assertFalse(categorical)
     self.assertTrue(len(y.shape)==1)
     
     # string type not correct --> KeyError
     with self.assertRaises(KeyError):
         fs._prepare_outcomes(results[1], "non existing key")
     
     # classify function correct
     def classify(data):
         result = data['deceased population region 1']
         classes =  np.zeros(result.shape[0])
         classes[result[:, -1] > 1000000] = 1
         return classes
     
     y, categorical = fs._prepare_outcomes(results[1], classify)
     
     self.assertTrue(categorical)
     self.assertTrue(len(y.shape)==1)
     
     # neither string nor classify function --> TypeError
     with self.assertRaises(TypeError):
         fs._prepare_outcomes(results[1], 1)
 def test_prim_init_select(self):
     self.results = test_utilities.load_flu_data()
     self.classify = flu_classify        
     
     experiments, outcomes = self.results
     
     unc = recfunctions.get_names(experiments.dtype)
     
     # test initialization, including t_coi calculation in case of searching
     # for results equal to or higher than the threshold
     outcomes['death toll'] = outcomes['deceased population region 1'][:, -1]
     results = experiments, outcomes
     threshold = 10000
     prim_obj = prim.setup_prim(results, classify='death toll', 
                          threshold_type=prim.ABOVE, threshold=threshold,
                          incl_unc=unc)
     
     value = np.ones((experiments.shape[0],))
     value = value[outcomes['death toll'] >= threshold].shape[0]
     self.assertTrue(prim_obj.t_coi==value)
             
     # test initialization, including t_coi calculation in case of searching
     # for results equal to or lower  than the threshold
     threshold = 1000
     prim_obj = prim.setup_prim(results, classify='death toll', 
                          threshold_type=prim.BELOW, 
                          threshold=threshold)
     
     value = np.ones((experiments.shape[0],))
     value = value[outcomes['death toll'] <= threshold].shape[0]
     self.assertTrue(prim_obj.t_coi==value)
     
     prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
示例#3
0
    def test_get_univariate_feature_scores(self):
        results = test_utilities.load_flu_data()
        
        def classify(data):
            #get the output for deceased population
            result = data['deceased population region 1']
            
            #make an empty array of length equal to number of cases 
            classes =  np.zeros(result.shape[0])
            
            #if deceased population is higher then 1.000.000 people, classify as 1 
            classes[result[:, -1] > 1000000] = 1
            
            return classes
        
        # f classify
        scores = fs.get_univariate_feature_scores(results, classify)
        self.assertEqual(len(scores), len(results[0].dtype.fields))

        # chi2
        scores = fs.get_univariate_feature_scores(results, classify, 
                                                  score_func='chi2')
        self.assertEqual(len(scores), len(results[0].dtype.fields))
        
        # f regression
        ooi = 'nr deaths'
        results[1][ooi] = results[1]['deceased population region 1'][:,-1]
        scores = fs.get_univariate_feature_scores(results, ooi)
        self.assertEqual(len(scores), len(results[0].dtype.fields))
示例#4
0
 def test_prim_init_select(self):
     self.results = test_utilities.load_flu_data()
     self.classify = flu_classify        
     
     experiments, outcomes = self.results
     
     unc = recfunctions.get_names(experiments.dtype)
     
     # test initialization, including t_coi calculation in case of searching
     # for results equal to or higher than the threshold
     outcomes['death toll'] = outcomes['deceased population region 1'][:, -1]
     results = experiments, outcomes
     threshold = 10000
     prim_obj = prim.setup_prim(results, classify='death toll', 
                          threshold_type=prim.ABOVE, threshold=threshold,
                          incl_unc=unc)
     
     value = np.ones((experiments.shape[0],))
     value = value[outcomes['death toll'] >= threshold].shape[0]
     self.assertTrue(prim_obj.t_coi==value)
             
     # test initialization, including t_coi calculation in case of searching
     # for results equal to or lower  than the threshold
     threshold = 1000
     prim_obj = prim.setup_prim(results, classify='death toll', 
                          threshold_type=prim.BELOW, 
                          threshold=threshold)
     
     value = np.ones((experiments.shape[0],))
     value = value[outcomes['death toll'] <= threshold].shape[0]
     self.assertTrue(prim_obj.t_coi==value)
     
     prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
示例#5
0
 def test_get_rf_feature_scores(self):
     results = test_utilities.load_flu_data()
             
     def classify(data):
         #get the output for deceased population
         result = data['deceased population region 1']
         
         #make an empty array of length equal to number of cases 
         classes =  np.zeros(result.shape[0])
         
         #if deceased population is higher then 1.000.000 people, classify as 1 
         classes[result[:, -1] > 1000000] = 1
         
         return classes
     
     scores, forest = fs.get_rf_feature_scores(results, classify, 
                                               random_state=10)
     
     self.assertEqual(len(scores), len(results[0].dtype.fields))
     self.assertTrue(isinstance(forest, RandomForestClassifier))
     
     ooi = 'nr deaths'
     results[1][ooi] = results[1]['deceased population region 1'][:,-1]
     scores, forest = fs.get_rf_feature_scores(results, ooi, 
                                               random_state=10)
     
     self.assertEqual(len(scores), len(results[0].dtype.fields))
     self.assertTrue(isinstance(forest, RandomForestRegressor))
 def test_setup_prim_exceptions(self):
     results = test_utilities.load_flu_data()
     self.assertRaises(prim.PrimException, 
                       prim.setup_prim,
                       results, 
                       'deceased population region 1', 
                       threshold=0.8)
     
     def faulty_classify(outcomes):
         return outcomes['deceased population region 1'][:, 0:10]
     self.assertRaises(prim.PrimException, prim.setup_prim, results, 
                       faulty_classify, threshold=0.8)
示例#7
0
 def test_setup_prim_exceptions(self):
     results = test_utilities.load_flu_data()
     self.assertRaises(prim.PrimException, 
                       prim.setup_prim,
                       results, 
                       'deceased population region 1', 
                       threshold=0.8)
     
     def faulty_classify(outcomes):
         return outcomes['deceased population region 1'][:, 0:10]
     self.assertRaises(prim.PrimException, prim.setup_prim, results, 
                       faulty_classify, threshold=0.8)
示例#8
0
    def test_find_box(self):
        results = test_utilities.load_flu_data()
        classify = flu_classify

        prim_obj = prim.setup_prim(results, classify, threshold=0.8)
        box_1 = prim_obj.find_box()
        prim_obj._update_yi_remaining()

        after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0]
        self.assertEqual(after_find, prim_obj.y.shape[0])

        box_2 = prim_obj.find_box()
        prim_obj._update_yi_remaining()

        after_find = box_1.yi.shape[0] +\
                     box_2.yi.shape[0] +\
                     prim_obj.yi_remaining.shape[0]
        self.assertEqual(after_find, prim_obj.y.shape[0])
示例#9
0
    def test_boxes(self):
        x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)],
                     dtype=[('a', np.float), ('b', np.float), ('c', np.float)])
        y = {'y': np.array([0, 1, 2])}
        results = (x, y)

        prim_obj = prim.setup_prim(results, 'y', threshold=0.8)
        boxes = prim_obj.boxes

        self.assertEqual(len(boxes), 1, 'box length not correct')

        # real data test case
        prim_obj = prim.setup_prim(test_utilities.load_flu_data(),
                                   flu_classify,
                                   threshold=0.8)
        prim_obj.find_box()
        boxes = prim_obj.boxes
        self.assertEqual(len(boxes), 2, 'box length not correct')
示例#10
0
 def test_find_box(self):
     results = test_utilities.load_flu_data()
     classify = flu_classify
     
     prim_obj = prim.setup_prim(results, classify, 
                                threshold=0.8)
     box_1 = prim_obj.find_box()
     prim_obj._update_yi_remaining()
     
     after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0]
     self.assertEqual(after_find, prim_obj.y.shape[0])
     
     box_2 = prim_obj.find_box()
     prim_obj._update_yi_remaining()
     
     after_find = box_1.yi.shape[0] +\
                  box_2.yi.shape[0] +\
                  prim_obj.yi_remaining.shape[0]
     self.assertEqual(after_find, prim_obj.y.shape[0])
示例#11
0
 def test_boxes(self):
     x = np.array([(0,1,2),
                   (2,5,6),
                   (3,2,1)], 
                  dtype=[('a', np.float),
                         ('b', np.float),
                         ('c', np.float)])
     y = {'y':np.array([0,1,2])}
     results = (x,y)
     
     prim_obj = prim.setup_prim(results, 'y', threshold=0.8)
     boxes = prim_obj.boxes
     
     self.assertEqual(len(boxes), 1, 'box length not correct')
     
     
     # real data test case        
     prim_obj = prim.setup_prim(test_utilities.load_flu_data(), flu_classify,
                                threshold=0.8)
     prim_obj.find_box()
     boxes = prim_obj.boxes
     self.assertEqual(len(boxes), 2, 'box length not correct')        
示例#12
0
    def test_setup_cart(self):
        results = test_utilities.load_flu_data()

        cart_algorithm = cart.setup_cart(results, flu_classify, mass_min=0.05)
示例#13
0
 def test_setup_cart(self):
     results = test_utilities.load_flu_data()
     
     cart_algorithm = cart.setup_cart(results, flu_classify, mass_min=0.05)