def test_part_discrete_data(self): train_data, test_data = load_project_data('example') examples = [ex for ex in train_data] + [ex for ex in test_data] data = ExampleSet(examples) n = Node() H_x, H_y_x, part_data = n.partition_data(data, 1) self.assertAlmostEqual(0.61219, H_y_x, 3) self.assertAlmostEqual(1.5849, H_x, 3) H_x, H_y_x, part_data = n.partition_data(data, 3) self.assertAlmostEqual(0.61219, H_y_x, 3) self.assertAlmostEqual(1.5849, H_x, 3) attr_index, part_data = n.max_GR(examples, [1, 3]) self.assertEqual(attr_index, 1) part_data_test = {} for ex in examples: part_data_test.setdefault(ex[1], []).append(ex) self.assertEqual(part_data_test, part_data) attr_index, part_data = n.max_GR(examples, [3, 1]) self.assertEqual(attr_index, 3) part_data_test = {} for ex in examples: part_data_test.setdefault(ex[3], []).append(ex) self.assertEqual(part_data_test, part_data)
def test_part_discrete_data(self): train_data, test_data = load_project_data('example') examples = [ex for ex in train_data] + [ex for ex in test_data] data = ExampleSet(examples) n = Node() H_x,H_y_x, part_data = n.partition_data(data,1) self.assertAlmostEqual(0.61219,H_y_x,3) self.assertAlmostEqual(1.5849,H_x,3) H_x,H_y_x, part_data = n.partition_data(data,3) self.assertAlmostEqual(0.61219,H_y_x,3) self.assertAlmostEqual(1.5849,H_x,3) attr_index,part_data = n.max_GR(examples,[1,3]) self.assertEqual(attr_index,1) part_data_test = {} for ex in examples: part_data_test.setdefault(ex[1],[]).append(ex) self.assertEqual(part_data_test,part_data) attr_index,part_data = n.max_GR(examples,[3,1]) self.assertEqual(attr_index,3) part_data_test = {} for ex in examples: part_data_test.setdefault(ex[3],[]).append(ex) self.assertEqual(part_data_test,part_data)
def test_is_partable_data(self): train_data, test_data = load_project_data('example') examples = [ex for ex in train_data] + [ex for ex in test_data] data = ExampleSet(examples) n = Node() self.assertTrue(n.check_ex_set(examples,[1,3])) self.assertTrue(n.check_ex_set(examples,[1,3])) index,part_data = n.max_GR(examples,[1,3]) self.assertEqual(index,1) test_part_data = [ex for ex in examples if ex[1]=='red'] self.assertEqual(set(test_part_data),set(part_data['red'])) self.assertTrue(n.check_ex_set(part_data['red'],[3,])[1]) index,sub_data = n.max_GR(part_data['red'],[3]) self.assertEqual(index,3) test_part_data = [ex for ex in examples if ex[1]=='green'] self.assertEqual(set(test_part_data),set(part_data['green'])) self.assertFalse(n.check_ex_set(part_data['green'],[3,])[1]) test_part_data = [ex for ex in examples if ex[1]=='blue'] self.assertEqual(set(test_part_data),set(part_data['blue'])) self.assertTrue(n.check_ex_set(part_data['blue'],[3,])[1]) index,sub_data = n.max_GR(part_data['blue'],[3]) self.assertEqual(index,3) index,sub_data = n.max_GR(examples,[3,1]) self.assertEqual(index,3)
def test_is_partable_data(self): train_data, test_data = load_project_data('example') examples = [ex for ex in train_data] + [ex for ex in test_data] data = ExampleSet(examples) n = Node() self.assertTrue(n.check_ex_set(examples, [1, 3])) self.assertTrue(n.check_ex_set(examples, [1, 3])) index, part_data = n.max_GR(examples, [1, 3]) self.assertEqual(index, 1) test_part_data = [ex for ex in examples if ex[1] == 'red'] self.assertEqual(set(test_part_data), set(part_data['red'])) self.assertTrue(n.check_ex_set(part_data['red'], [ 3, ])[1]) index, sub_data = n.max_GR(part_data['red'], [3]) self.assertEqual(index, 3) test_part_data = [ex for ex in examples if ex[1] == 'green'] self.assertEqual(set(test_part_data), set(part_data['green'])) self.assertFalse(n.check_ex_set(part_data['green'], [ 3, ])[1]) test_part_data = [ex for ex in examples if ex[1] == 'blue'] self.assertEqual(set(test_part_data), set(part_data['blue'])) self.assertTrue(n.check_ex_set(part_data['blue'], [ 3, ])[1]) index, sub_data = n.max_GR(part_data['blue'], [3]) self.assertEqual(index, 3) index, sub_data = n.max_GR(examples, [3, 1]) self.assertEqual(index, 3)