def test_compute_entropy_of_split_weighted(self): fxnGenTrue = build_instance_generator(1.0) fxnGenFalse = build_instance_generator(0.0, fxnGenWeight=lambda: 0.25) cInst = 10 listInst = fxnGenTrue(cInst) + fxnGenFalse(4 * cInst) dblEntropy = dtree.compute_entropy_of_split({0: listInst}) self.assertAlmostEqual(1.0, dblEntropy)
def test_compute_entropy_of_split_weighted(self): fxnGenTrue = build_instance_generator(1.0) fxnGenFalse = build_instance_generator(0.0, fxnGenWeight=lambda: 0.25) cInst = 10 listInst = fxnGenTrue(cInst) + fxnGenFalse(4*cInst) dblEntropy = dtree.compute_entropy_of_split({0: listInst}) self.assertAlmostEqual(1.0, dblEntropy)
def test_compute_entropy_of_split(self): cAttrs = random.randint(2, 20) cValues = random.randint(1, 30) fxnGenOne = lambda _: build_entropy_one_instances(cAttrs, cValues) fxnGenOne.cAttrs = cAttrs fxnGenOne.cValues = cValues fxnGenZero = build_instance_generator(0.0, cAttrs=3) dblDelta = 0.01 for fxnGen, dblP in zip((fxnGenOne, fxnGenZero, ), (1.0, 0.0)): listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) dblEntropy = dtree.compute_entropy_of_split(dictInst) self.assertTrue(abs(dblEntropy - dblP) < dblDelta, "%.3f not within %.3f of expected %.3f" % (dblEntropy, dblDelta, dblP))
def test_compute_entropy_of_split(self): cAttrs = random.randint(2,20) cValues = random.randint(1,30) fxnGenOne = lambda _: build_entropy_one_instances(cAttrs, cValues) fxnGenOne.cAttrs = cAttrs fxnGenOne.cValues = cValues fxnGenZero = build_instance_generator(0.0, cAttrs=3) dblDelta = 0.01 for fxnGen,dblP in zip((fxnGenOne,fxnGenZero,),(1.0,0.0)): listInst = fxnGen(self.cInsts) for ixAttr in xrange(fxnGen.cAttrs): dictInst = dtree.separate_by_attribute(listInst, ixAttr) dblEntropy = dtree.compute_entropy_of_split(dictInst) self.assertTrue(abs(dblEntropy - dblP) < dblDelta, "%.3f not within %.3f of expected %.3f" % (dblEntropy, dblDelta, dblP))