Пример #1
0
 def test_part_discrete_data(self): 
     train_data, test_data = load_project_data('example')
     
     examples = [ex for ex in train_data] + [ex for ex in test_data]
     data = ExampleSet(examples)    
     
     n = Node()
     
     H_x,H_y_x, part_data = n.partition_data(data,1)
     
     self.assertAlmostEqual(0.61219,H_y_x,3)
     self.assertAlmostEqual(1.5849,H_x,3)
     
     H_x,H_y_x, part_data  = n.partition_data(data,3)
     self.assertAlmostEqual(0.61219,H_y_x,3)
     self.assertAlmostEqual(1.5849,H_x,3)
     
     attr_index,part_data = n.max_GR(examples,[1,3])
     self.assertEqual(attr_index,1)
     
     part_data_test = {}
     for ex in examples:
         part_data_test.setdefault(ex[1],[]).append(ex) 
     self.assertEqual(part_data_test,part_data)    
     
     attr_index,part_data = n.max_GR(examples,[3,1])
     self.assertEqual(attr_index,3)
     
     part_data_test = {}
     for ex in examples:
         part_data_test.setdefault(ex[3],[]).append(ex) 
     self.assertEqual(part_data_test,part_data) 
Пример #2
0
 def test_is_partable_data(self): 
     train_data, test_data = load_project_data('example')
     
     examples = [ex for ex in train_data] + [ex for ex in test_data]
     data = ExampleSet(examples) 
     
     n = Node()   
     
     
     self.assertTrue(n.check_ex_set(examples,[1,3]))  
     self.assertTrue(n.check_ex_set(examples,[1,3]))
     
     index,part_data = n.max_GR(examples,[1,3])  
     self.assertEqual(index,1)
     test_part_data = [ex for ex in examples if ex[1]=='red']
     self.assertEqual(set(test_part_data),set(part_data['red']))
     
     self.assertTrue(n.check_ex_set(part_data['red'],[3,])[1])
     index,sub_data = n.max_GR(part_data['red'],[3])  
     self.assertEqual(index,3)
     
     test_part_data = [ex for ex in examples if ex[1]=='green']
     self.assertEqual(set(test_part_data),set(part_data['green']))
     self.assertFalse(n.check_ex_set(part_data['green'],[3,])[1])
     
     test_part_data = [ex for ex in examples if ex[1]=='blue']
     self.assertEqual(set(test_part_data),set(part_data['blue']))
     self.assertTrue(n.check_ex_set(part_data['blue'],[3,])[1])
     index,sub_data = n.max_GR(part_data['blue'],[3])  
     self.assertEqual(index,3)
     
     index,sub_data = n.max_GR(examples,[3,1])  
     self.assertEqual(index,3)
Пример #3
0
 def test_node_shape(self): 
 
     tree = Node()
     children = {1:Node(),2:Node()}
     tree.children = children
     self.assertEqual((3,1),tree.shape())
     
     tree.children[1].children = children = {3:Node(),4:Node()}
     self.assertEqual((5,2),tree.shape())
     
     tree.children[1].children[3].children = children = {5:Node(),6:Node()}
     self.assertEqual((7,3),tree.shape())
def nfold(meta, data, target, num=10):
    total = len(data)
    validate = total // num

    ix = np.array(data.index)
    np.random.shuffle(ix)

    test = []
    tries = []
    nodes = []
    for i in range(num):
        print 'val with', validate, total
        first = ix[:i*validate]
        second = ix[(i+1)*validate:]
        training = concat([data.loc[first], data.loc[second]])
        validating = data.loc[ix[i*validate:(i+1)*validate]]
        node = Node(meta, training, target)
        tests = node.run()
        vals = node.validate(validating)[1]
        print tests, vals
        test.append(tests)
        tries.append(vals)
        nodes.append(node)
    avg = sum(tries)/len(tries)
    print avg
    node = Node(meta, data, target)
    wrong = node.run()
    return avg, wrong, node, test, tries, nodes
Пример #5
0
    def test_part_discrete_data(self):
        train_data, test_data = load_project_data('example')

        examples = [ex for ex in train_data] + [ex for ex in test_data]
        data = ExampleSet(examples)

        n = Node()

        H_x, H_y_x, part_data = n.partition_data(data, 1)

        self.assertAlmostEqual(0.61219, H_y_x, 3)
        self.assertAlmostEqual(1.5849, H_x, 3)

        H_x, H_y_x, part_data = n.partition_data(data, 3)
        self.assertAlmostEqual(0.61219, H_y_x, 3)
        self.assertAlmostEqual(1.5849, H_x, 3)

        attr_index, part_data = n.max_GR(examples, [1, 3])
        self.assertEqual(attr_index, 1)

        part_data_test = {}
        for ex in examples:
            part_data_test.setdefault(ex[1], []).append(ex)
        self.assertEqual(part_data_test, part_data)

        attr_index, part_data = n.max_GR(examples, [3, 1])
        self.assertEqual(attr_index, 3)

        part_data_test = {}
        for ex in examples:
            part_data_test.setdefault(ex[3], []).append(ex)
        self.assertEqual(part_data_test, part_data)
Пример #6
0
    def test_discrete_predict(self):
        train_data, test_data = load_project_data('example')
        examples = [ex for ex in train_data] + [ex for ex in test_data]
        data = ExampleSet(examples)

        n = Node()

        n.train(data, [1, 3])  #only train on the discrete data

        self.assertTrue(all([ex[-1] == n.predict(ex) for ex in data]))

        n.train(data, [3, 1])  #only train on the discrete data

        self.assertTrue(all([ex[-1] == n.predict(ex) for ex in data]))
Пример #7
0
    def test_is_partable_data(self):
        train_data, test_data = load_project_data('example')

        examples = [ex for ex in train_data] + [ex for ex in test_data]
        data = ExampleSet(examples)

        n = Node()

        self.assertTrue(n.check_ex_set(examples, [1, 3]))
        self.assertTrue(n.check_ex_set(examples, [1, 3]))

        index, part_data = n.max_GR(examples, [1, 3])
        self.assertEqual(index, 1)
        test_part_data = [ex for ex in examples if ex[1] == 'red']
        self.assertEqual(set(test_part_data), set(part_data['red']))

        self.assertTrue(n.check_ex_set(part_data['red'], [
            3,
        ])[1])
        index, sub_data = n.max_GR(part_data['red'], [3])
        self.assertEqual(index, 3)

        test_part_data = [ex for ex in examples if ex[1] == 'green']
        self.assertEqual(set(test_part_data), set(part_data['green']))
        self.assertFalse(n.check_ex_set(part_data['green'], [
            3,
        ])[1])

        test_part_data = [ex for ex in examples if ex[1] == 'blue']
        self.assertEqual(set(test_part_data), set(part_data['blue']))
        self.assertTrue(n.check_ex_set(part_data['blue'], [
            3,
        ])[1])
        index, sub_data = n.max_GR(part_data['blue'], [3])
        self.assertEqual(index, 3)

        index, sub_data = n.max_GR(examples, [3, 1])
        self.assertEqual(index, 3)
Пример #8
0
 def test_discrete_predict(self): 
     train_data, test_data = load_project_data('example')
     examples = [ex for ex in train_data] + [ex for ex in test_data]
     data = ExampleSet(examples)    
     
     n = Node()
     
     n.train(data,[1,3]) #only train on the discrete data
     
     self.assertTrue(all([ex[-1]==n.predict(ex) for ex in data]))
     
     n.train(data,[3,1]) #only train on the discrete data
     
     self.assertTrue(all([ex[-1]==n.predict(ex) for ex in data]))
Пример #9
0
    def test_node_shape(self):

        tree = Node()
        children = {1: Node(), 2: Node()}
        tree.children = children
        self.assertEqual((3, 1), tree.shape())

        tree.children[1].children = children = {3: Node(), 4: Node()}
        self.assertEqual((5, 2), tree.shape())

        tree.children[1].children[3].children = children = {
            5: Node(),
            6: Node()
        }
        self.assertEqual((7, 3), tree.shape())
Пример #10
0
def pytest_funcarg__voting():
    return Node.from_arff('./vote.arff')
Пример #11
0
def pytest_funcarg__lenses():
    return Node.from_arff('./lenses.arff')