def test1(self): # " testing pruning with known results " oPts = [ [0, 0, 1, 0], [0, 1, 1, 1], [1, 0, 1, 1], [1, 1, 0, 0], [1, 1, 1, 1], ] tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) err, badEx = CrossValidate.CrossValidate(tree, oPts) assert err == 0.0, 'bad initial error' assert len(badEx) == 0, 'bad initial error' # prune with original data, shouldn't do anything f = StringIO() with redirect_stdout(f): PruneTree._verbose = True newTree, err = PruneTree.PruneTree(tree, [], oPts) PruneTree._verbose = False self.assertIn('Pruner', f.getvalue()) assert newTree == tree, 'improper pruning' # prune with train data newTree, err = PruneTree.PruneTree(tree, [], tPts) assert newTree != tree, 'bad pruning' assert feq(err, 0.14286), 'bad error result'
def _testChain(): from rdkit.ML.DecTree import ID3 oPts= [ \ [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [1,0,0,0,1], [0,0,1,1,0], [0,0,1,1,0], [0,0,1,1,1], [0,1,0,1,0], [0,1,0,1,0], [0,1,0,0,1], ] tPts = oPts tree = ID3.ID3Boot(oPts, attrs=range(len(oPts[0]) - 1), nPossibleVals=[2] * len(oPts[0])) tree.Print() err, badEx = CrossValidate.CrossValidate(tree, oPts) print('original error:', err) err, badEx = CrossValidate.CrossValidate(tree, tPts) print('original holdout error:', err) newTree, frac2 = PruneTree(tree, oPts, tPts) newTree.Print() err, badEx = CrossValidate.CrossValidate(newTree, tPts) print('pruned holdout error is:', err) print(badEx)
def _testSpecific(): from rdkit.ML.DecTree import ID3 oPts= [ \ [0,0,1,0], [0,1,1,1], [1,0,1,1], [1,1,0,0], [1,1,1,1], ] tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]] tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4) tree.Print() err, badEx = CrossValidate.CrossValidate(tree, oPts) print('original error:', err) err, badEx = CrossValidate.CrossValidate(tree, tPts) print('original holdout error:', err) newTree, frac2 = PruneTree(tree, oPts, tPts) newTree.Print() err, badEx = CrossValidate.CrossValidate(newTree, tPts) print('pruned holdout error is:', err) print(badEx) print(len(tree), len(newTree))
def _setupMultiTree(self): examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2], [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]] data = MLData.MLQuantDataSet(examples) attrs = range(0, data.GetNVars()) t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) self.t1 = t1 self.examples = examples
def TestTree(): """ testing code for named trees """ examples1 = [['p1', 0, 1, 0, 0], ['p2', 0, 0, 0, 1], ['p3', 0, 0, 1, 2], ['p4', 0, 1, 1, 2], ['p5', 1, 0, 0, 2], ['p6', 1, 0, 1, 2], ['p7', 1, 1, 0, 2], ['p8', 1, 1, 1, 0]] attrs = list(range(1, len(examples1[0]) - 1)) nPossibleVals = [0, 2, 2, 2, 3] t1 = ID3.ID3Boot(examples1, attrs, nPossibleVals, maxDepth=1) t1.Print()
def _setupPyMultiTree(self): from rdkit.ML.InfoTheory import entropy ID3.entropy.InfoEntropy = entropy.PyInfoEntropy ID3.entropy.InfoGain = entropy.PyInfoGain examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2], [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]] data = MLData.MLQuantDataSet(examples) attrs = range(0, data.GetNVars()) t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals()) self.t1 = t1 self.examples = examples
def test1(self): " testing pruning with known results " oPts= [ \ [0,0,1,0], [0,1,1,1], [1,0,1,1], [1,1,0,0], [1,1,1,1], ] tPts = oPts+[[0,1,1,0],[0,1,1,0]] tree = ID3.ID3Boot(oPts,attrs=range(3),nPossibleVals=[2]*4) err,badEx = CrossValidate.CrossValidate(tree,oPts) assert err==0.0,'bad initial error' assert len(badEx)==0,'bad initial error' # prune with original data, shouldn't do anything newTree,err = PruneTree.PruneTree(tree,[],oPts) assert newTree==tree,'improper pruning' # prune with train data newTree,err = PruneTree.PruneTree(tree,[],tPts) assert newTree!=tree,'bad pruning' assert feq(err,0.14286),'bad error result'
def GenRandomExamples(nVars=10, randScale=0.3, bitProb=0.5, nExamples=500, seed=(0, 0), addResults=1): random.seed(seed[0]) varWeights = numpy.array([random.random() for _ in range(nVars)]) * randScale examples = [None] * nExamples for i in range(nExamples): varVals = [random.random() > bitProb for _ in range(nVars)] temp = numpy.array(varVals) * varWeights res = sum(temp) if addResults: varVals.append(res >= 1.) examples[i] = varVals nPossibleVals = [2] * (nExamples + 1) attrs = list(range(nVars)) return (examples, attrs, nPossibleVals) if __name__ == '__main__': # pragma: nocover from rdkit.six.moves import cPickle examples, attrs, nPossibleVals = GenRandomExamples() outF = open('random.dat.pkl', 'wb+') cPickle.dump(examples, outF) cPickle.dump(attrs, outF) cPickle.dump(nPossibleVals, outF) tree = ID3.ID3Boot(examples, attrs, nPossibleVals) tree.Pickle('save.pkl')