def test_separate_by_attribute(self): fxnGen = build_instance_generator(0.5) listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) setValues = set([inst.listAttrs[ixAttr] for inst in listInst]) self.assertEqual(len(setValues), len(dictInst)) for cValue,listInstSeparate in dictInst.iteritems(): for inst in listInstSeparate: self.assertEqual(cValue, inst.listAttrs[ixAttr])
def test_separate_by_attribute(self): fxnGen = build_instance_generator(0.5) listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) setValues = set([inst.listAttrs[ixAttr] for inst in listInst]) self.assertEqual(len(setValues), len(dictInst)) for cValue, listInstSeparate in dictInst.iteritems(): for inst in listInstSeparate: self.assertEqual(cValue, inst.listAttrs[ixAttr])
def test_compute_entropy_of_split(self): cAttrs = random.randint(2, 20) cValues = random.randint(1, 30) fxnGenOne = lambda _: build_entropy_one_instances(cAttrs, cValues) fxnGenOne.cAttrs = cAttrs fxnGenOne.cValues = cValues fxnGenZero = build_instance_generator(0.0, cAttrs=3) dblDelta = 0.01 for fxnGen, dblP in zip((fxnGenOne, fxnGenZero, ), (1.0, 0.0)): listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) dblEntropy = dtree.compute_entropy_of_split(dictInst) self.assertTrue(abs(dblEntropy - dblP) < dblDelta, "%.3f not within %.3f of expected %.3f" % (dblEntropy, dblDelta, dblP))
def test_compute_entropy_of_split(self): cAttrs = random.randint(2,20) cValues = random.randint(1,30) fxnGenOne = lambda _: build_entropy_one_instances(cAttrs, cValues) fxnGenOne.cAttrs = cAttrs fxnGenOne.cValues = cValues fxnGenZero = build_instance_generator(0.0, cAttrs=3) dblDelta = 0.01 for fxnGen,dblP in zip((fxnGenOne,fxnGenZero,),(1.0,0.0)): listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) dblEntropy = dtree.compute_entropy_of_split(dictInst) self.assertTrue(abs(dblEntropy - dblP) < dblDelta, "%.3f not within %.3f of expected %.3f" % (dblEntropy, dblDelta, dblP))