Exemplo n.º 1
0
    def test_split(self):
        left, right = split_data(self.data, 0, 25)
        for point in left:
            self.assertLess(point.values[0], 25)
        self.assertEqual(len(left), 3)

        for point in right:
            self.assertGreaterEqual(point.values[0], 25)
        self.assertEqual(len(right), 7)
Exemplo n.º 2
0
    def test_split(self):
        left, right = split_data(self.data, 0, 25)
        for point in left:
            self.assertLess(point.values[0], 25)
        self.assertEqual(len(left), 3)

        for point in right:
            self.assertGreaterEqual(point.values[0], 25)
        self.assertEqual(len(right), 7)
Exemplo n.º 3
0
 def test_best_split(self):
     feature, thresh = find_best_split(self.data)
     self.assertEqual(feature, 1)
     self.assertEqual(thresh, 38000)
     left, right = split_data(self.data, feature, thresh)
     feature, thresh = find_best_split(left)
     self.assertEqual(feature, None)
     self.assertEqual(thresh, None)
     feature, thresh = find_best_split(right)
     self.assertEqual(feature, 0)
     self.assertEqual(thresh, 43)
Exemplo n.º 4
0
 def test_best_split(self):
     feature, thresh = find_best_split(self.data)
     self.assertEqual(feature, 1)
     self.assertEqual(thresh, 38000)
     left, right = split_data(self.data, feature, thresh)
     feature, thresh = find_best_split(left)
     self.assertEqual(feature, None)
     self.assertEqual(thresh, None)
     feature, thresh = find_best_split(right)
     self.assertEqual(feature, 0)
     self.assertEqual(thresh, 43)
Exemplo n.º 5
0
def test_split_data():
    dataset = [[2.771244718,1.784783929,0],
    	[1.728571309,1.169761413,0],
    	[3.678319846,2.81281357,0],
    	[3.961043357,2.61995032,0],
    	[2.999208922,2.209014212,0],
    	[7.497545867,3.162953546,1],
    	[9.00220326,3.339047188,1],
    	[7.444542326,0.476683375,1],
    	[10.12493903,3.234550982,1],
    	[6.642287351,3.319983761,1]]

    dataset = np.asarray(dataset)
    train_data = np.delete(dataset, 2, axis=1)
    train_labels = np.delete(dataset, (0,1), axis=1)

    print(rF.entropy((train_labels.reshape(1,10)[0].tolist())))
    r_d,r_l,l_d,l_l = rF.split_data(train_data, train_labels, 0, 2)
    print(r_d,r_l,l_d,l_l)