def xy_xval(start,stop,data,rows,f,z,k,m,check,abcd): rmax = len(rows) test = [] hypotheses = {} temp = "" acc = 0.0 for r in range(start,stop): d = rows[r] test.append(d) bef = "__bef" makeTable(colname[z],bef) for r in range(0,rmax): d = rows[r] addRow(d,bef) for ts in test: # print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" ind = data[z].index(ts)#index of test row in data[z] l = "__aft"+str(test.index(ts)) dafter = xy_proj(bef,data,ind,z,check) makeTable(colname[z],l) for r in range(0,len(dafter)): d = dafter[r] addRow(d,l) if check ==True: tableprint(l) #print each leaf table hypotheses = hypbuild(data,l) where = klassAt(l) total = 0.0 for h in hypotheses: total += len(data[h]) want = ts[where] got = xy_nb(ts,data,hypotheses,total,l,k,m,check) #check what we are expecting and getting if check == True: print "want:",want,"got:",got if want == got: acc+=1.0 if check == True: sys.exit() #exit after a round abcd.keep(want,got)
def xy_dt(z, args, pdffile=False): zlst = xy_proj(z, data, args) if args['distprune']: zlst = tshortener.distance_pruner(zlst) clustered_data = regroup(zlst, "data") clustered_target = regroup(zlst, "target") #Convert lists to np.ndarrays clustered_target = np.asarray(np.float_(clustered_target)) cd_temp = [] for row in clustered_data: row_temp = [] for item in row: row_temp.append(np.float_(item)) cd_temp.append(row_temp) clustered_data = np.asarray(cd_temp) #Build Classifier clf = tree.DecisionTreeClassifier(min_samples_leaf=args['l'], criterion=args['c']) #,max_depth=5) clf = clf.fit(clustered_data, clustered_target) if pdffile: from sklearn.externals.six import StringIO with open(z + "iris.dot", 'w') as f: f = tree.export_graphviz(clf, out_file=f, feature_names=colname[z]) if args['dtreeprune']: branches = tshortener.prune_similar(clf, args, colname[zlst[1]], delete_more=True, prune=True) print "\n", "#" * 25, "New Tree", "#" * 25 branches = xy_dt0(clf, colname[zlst[1]]) return zlst, branches
def xy_projed(): csvfile = open('../data/'+argv[1]+'.csv','r') readCsv(csvfile,argv[2]) #tableprint(argv[2]) k = xy_proj(argv[2],data,4,argv[2],False)