def cross(file='data/housingD.csv',rseed=1): def klass(test): return test.cells[train.klass[0].col] seed(rseed) tbl = discreteTable(file) n=0 abcd=Abcd() nLeaves=Num() nNodes=Num() for tests, train in xval(tbl): tree = tdiv(train) for node in dtnodes(tree): print node.branch nLeaves + len([n for n in dtleaves(tree)]) nNodes + len([n for n in dtnodes(tree)]) for test in tests: want = klass(test) got = classify(test,tree) abcd(want,got) exit() nl() abcd.header() abcd.report() print ":nodes",sorted(nNodes.some.all()) print ":leaves",sorted(nLeaves.some.all())
def cross(file='data/housingD.csv', rseed=1): def klass(test): return test.cells[train.klass[0].col] seed(rseed) tbl = discreteTable(file) n = 0 abcd = Abcd() nLeaves = Num() nNodes = Num() for tests, train in xval(tbl): tree = tdiv(train) for node in dtnodes(tree): print node.branch nLeaves + len([n for n in dtleaves(tree)]) nNodes + len([n for n in dtnodes(tree)]) for test in tests: want = klass(test) got = classify(test, tree) abcd(want, got) exit() nl() abcd.header() abcd.report() print ":nodes", sorted(nNodes.some.all()) print ":leaves", sorted(nLeaves.some.all())
def _Abcd(testleaf, train=[], test=[]): abcd = Abcd(db='Traing', rx='Testing') def isDef(x): return "Defective" if x > 0.5 else "Non-Defective" for leaf in testleaf: try: test += [isDef(leaf.score)] except Exception, e: # go to middle points # give the median of all rows in this point # test += [isDef(leafscore(leaf))] continue
def learns(tests,trains,indep=lambda x: x[:-1], dep = lambda x: x[-1], rf = Abcd(), lg = Abcd(), dt = Abcd(), nb = Abcd()): x1,y1,x2,y2= trainTest(tests,trains,indep,dep) forest = RandomForestClassifier(n_estimators = 50) forest = forest.fit(x1,y1) for n,got in enumerate(forest.predict(x2)): rf(predicted = got, actual = y2[n]) logreg = linear_model.LogisticRegression(C=1e5) logreg.fit(x1, y1) for n,got in enumerate(logreg.predict(x2)): lg(predicted = got, actual = y2[n]) bayes = GaussianNB() bayes.fit(x1,y1) for n,got in enumerate(bayes.predict(x2)): nb(predicted = got, actual = y2[n]) dectree = DecisionTreeClassifier(criterion="entropy", random_state=1) dectree.fit(x1,y1) for n,got in enumerate(dectree.predict(x2)): dt(predicted = got, actual = y2[n])
def _Abcd(predicted, actual): predicted_txt = [] abcd = Abcd(db='Traing', rx='Testing') def isDef(x): return "Defective" if x > 0 else "Non-Defective" for data in predicted: predicted_txt +=[isDef(data)] for act, pre in zip(actual, predicted_txt): abcd.tell(act, pre) abcd.header() score = abcd.ask() # pdb.set_trace() return score
def _Abcd(testleaf, testdata, train): # train=[] test = [] abcd = Abcd(db='Traing', rx='Testing') def isDef(x): return "Defective" if x > The.option.threshold else "Non-Defective" for leaf, data in zip(testleaf, testdata): try: test += [isDef(leaf.score)] # test +=[isDef(majorityscore(data,leaf))] except Exception: # go to middle points # give the median of all rows in this point # pdb.set_trace() test += [isDef(leafscore(leaf))] continue for actual, predicted in zip(train, test): abcd.tell(actual, predicted) abcd.header() score = abcd.ask() return score
def snl(file='data/poi-1.5D.csv', rseed=1, w=dict(_1=0, _0=1)): def klass(x): return x.cells[train.klass[0].col] def val((x, y)): return y if x == ninf else x seed(rseed) nl() print "#", file tbl = discreteTable(file) tree0 = tdiv(tbl) showTdiv(tree0) nl() old, better, worse = Sym(), Sym(), Sym() abcd1, abcd2 = Abcd(db=file, rx="where"), Abcd(db=file, rx="ranfor") abcd3 = Abcd(db=file, rx="logref") abcd4 = Abcd(db=file, rx="dt") abcd5 = Abcd(db=file, rx="nb") for tests, train in xval(tbl): learns(tests, train._rows, indep=lambda row: map(val, row.cells[:-2]), dep=lambda row: row.cells[-1], rf=abcd2, lg=abcd3, dt=abcd4, nb=abcd5), tree = tdiv(train) snakesAndLadders(tree, train, w) for test in tests: abcd1(actual=klass(test), predicted=classify(test, tree)) a, b = improve(test, tree) old + a better + b _, c = degrade(test, tree) worse + c print "\n:asIs", old.counts print ":plan", better.counts print ":warn", worse.counts abcd1.header() abcd1.report() abcd2.report() abcd3.report() abcd4.report() abcd5.report()
def snl(file='data/poi-1.5D.csv',rseed=1,w=dict(_1=0,_0=1)): def klass(x): return x.cells[train.klass[0].col] def val((x,y)): return y if x == ninf else x seed(rseed) nl(); print "#",file tbl = discreteTable(file) tree0 = tdiv(tbl) showTdiv(tree0); nl() old, better, worse = Sym(), Sym(), Sym() abcd1, abcd2 = Abcd(db=file,rx="where"), Abcd(db=file,rx="ranfor") abcd3 = Abcd(db=file, rx="logref") abcd4 = Abcd(db=file, rx="dt") abcd5 = Abcd(db=file, rx="nb") for tests, train in xval(tbl): learns(tests,train._rows, indep=lambda row: map(val,row.cells[:-2]), dep = lambda row: row.cells[-1], rf = abcd2, lg = abcd3, dt = abcd4, nb = abcd5), tree = tdiv(train) snakesAndLadders(tree,train,w) for test in tests: abcd1(actual = klass(test), predicted = classify(test,tree)) a,b = improve(test,tree); old + a; better + b _,c = degrade(test,tree); worse + c print "\n:asIs",old.counts print ":plan",better.counts print ":warn",worse.counts abcd1.header() abcd1.report() abcd2.report() abcd3.report() abcd4.report() abcd5.report()