Пример #1
0
    def test1(self):
        # " testing pruning with known results "
        oPts = [
            [0, 0, 1, 0],
            [0, 1, 1, 1],
            [1, 0, 1, 1],
            [1, 1, 0, 0],
            [1, 1, 1, 1],
        ]
        tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]]
        tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4)
        err, badEx = CrossValidate.CrossValidate(tree, oPts)
        assert err == 0.0, 'bad initial error'
        assert len(badEx) == 0, 'bad initial error'

        # prune with original data, shouldn't do anything
        f = StringIO()
        with redirect_stdout(f):
            PruneTree._verbose = True
            newTree, err = PruneTree.PruneTree(tree, [], oPts)
            PruneTree._verbose = False
        self.assertIn('Pruner', f.getvalue())
        assert newTree == tree, 'improper pruning'

        # prune with train data
        newTree, err = PruneTree.PruneTree(tree, [], tPts)
        assert newTree != tree, 'bad pruning'
        assert feq(err, 0.14286), 'bad error result'
Пример #2
0
def _testChain():
    from rdkit.ML.DecTree import ID3
    oPts= [ \
      [1,0,0,0,1],
      [1,0,0,0,1],
      [1,0,0,0,1],
      [1,0,0,0,1],
      [1,0,0,0,1],
      [1,0,0,0,1],
      [1,0,0,0,1],
      [0,0,1,1,0],
      [0,0,1,1,0],
      [0,0,1,1,1],
      [0,1,0,1,0],
      [0,1,0,1,0],
      [0,1,0,0,1],
      ]
    tPts = oPts

    tree = ID3.ID3Boot(oPts,
                       attrs=range(len(oPts[0]) - 1),
                       nPossibleVals=[2] * len(oPts[0]))
    tree.Print()
    err, badEx = CrossValidate.CrossValidate(tree, oPts)
    print('original error:', err)

    err, badEx = CrossValidate.CrossValidate(tree, tPts)
    print('original holdout error:', err)
    newTree, frac2 = PruneTree(tree, oPts, tPts)
    newTree.Print()
    err, badEx = CrossValidate.CrossValidate(newTree, tPts)
    print('pruned holdout error is:', err)
    print(badEx)
Пример #3
0
def _testSpecific():
    from rdkit.ML.DecTree import ID3
    oPts= [ \
      [0,0,1,0],
      [0,1,1,1],
      [1,0,1,1],
      [1,1,0,0],
      [1,1,1,1],
      ]
    tPts = oPts + [[0, 1, 1, 0], [0, 1, 1, 0]]

    tree = ID3.ID3Boot(oPts, attrs=range(3), nPossibleVals=[2] * 4)
    tree.Print()
    err, badEx = CrossValidate.CrossValidate(tree, oPts)
    print('original error:', err)

    err, badEx = CrossValidate.CrossValidate(tree, tPts)
    print('original holdout error:', err)
    newTree, frac2 = PruneTree(tree, oPts, tPts)
    newTree.Print()
    err, badEx = CrossValidate.CrossValidate(newTree, tPts)
    print('pruned holdout error is:', err)
    print(badEx)

    print(len(tree), len(newTree))
Пример #4
0
    def _setupMultiTree(self):
        examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2],
                    [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]]

        data = MLData.MLQuantDataSet(examples)
        attrs = range(0, data.GetNVars())
        t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals())
        self.t1 = t1
        self.examples = examples
Пример #5
0
def TestTree():
    """ testing code for named trees

    """
    examples1 = [['p1', 0, 1, 0, 0], ['p2', 0, 0, 0, 1], ['p3', 0, 0, 1, 2], ['p4', 0, 1, 1, 2],
                 ['p5', 1, 0, 0, 2], ['p6', 1, 0, 1, 2], ['p7', 1, 1, 0, 2], ['p8', 1, 1, 1, 0]]
    attrs = list(range(1, len(examples1[0]) - 1))
    nPossibleVals = [0, 2, 2, 2, 3]
    t1 = ID3.ID3Boot(examples1, attrs, nPossibleVals, maxDepth=1)
    t1.Print()
Пример #6
0
    def _setupPyMultiTree(self):
        from rdkit.ML.InfoTheory import entropy
        ID3.entropy.InfoEntropy = entropy.PyInfoEntropy
        ID3.entropy.InfoGain = entropy.PyInfoGain

        examples = [[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 2], [0, 1, 1, 2],
                    [1, 0, 0, 2], [1, 0, 1, 2], [1, 1, 0, 2], [1, 1, 1, 0]]

        data = MLData.MLQuantDataSet(examples)
        attrs = range(0, data.GetNVars())
        t1 = ID3.ID3Boot(data.GetAllData(), attrs, data.GetNPossibleVals())
        self.t1 = t1
        self.examples = examples
Пример #7
0
  def test1(self):
    " testing pruning with known results "
    oPts= [ \
      [0,0,1,0],
      [0,1,1,1],
      [1,0,1,1],
      [1,1,0,0],
      [1,1,1,1],
      ]
    tPts = oPts+[[0,1,1,0],[0,1,1,0]]
    tree = ID3.ID3Boot(oPts,attrs=range(3),nPossibleVals=[2]*4)
    err,badEx = CrossValidate.CrossValidate(tree,oPts)
    assert err==0.0,'bad initial error'
    assert len(badEx)==0,'bad initial error'

    # prune with original data, shouldn't do anything
    newTree,err = PruneTree.PruneTree(tree,[],oPts)
    assert newTree==tree,'improper pruning'
    
    # prune with train data
    newTree,err = PruneTree.PruneTree(tree,[],tPts)
    assert newTree!=tree,'bad pruning'
    assert feq(err,0.14286),'bad error result'
Пример #8
0
def GenRandomExamples(nVars=10, randScale=0.3, bitProb=0.5, nExamples=500, seed=(0, 0),
                      addResults=1):
  random.seed(seed[0])
  varWeights = numpy.array([random.random() for _ in range(nVars)]) * randScale
  examples = [None] * nExamples

  for i in range(nExamples):
    varVals = [random.random() > bitProb for _ in range(nVars)]
    temp = numpy.array(varVals) * varWeights
    res = sum(temp)
    if addResults:
      varVals.append(res >= 1.)
    examples[i] = varVals

  nPossibleVals = [2] * (nExamples + 1)
  attrs = list(range(nVars))

  return (examples, attrs, nPossibleVals)


if __name__ == '__main__':  # pragma: nocover
  from rdkit.six.moves import cPickle
  examples, attrs, nPossibleVals = GenRandomExamples()
  outF = open('random.dat.pkl', 'wb+')
  cPickle.dump(examples, outF)
  cPickle.dump(attrs, outF)
  cPickle.dump(nPossibleVals, outF)

  tree = ID3.ID3Boot(examples, attrs, nPossibleVals)
  tree.Pickle('save.pkl')