示例#1
0
def xy_xval(start,stop,data,rows,f,z,k,m,check,abcd):
    rmax = len(rows)
    test = []
    hypotheses = {}
    temp = ""
    acc = 0.0
    for r in range(start,stop):
        d = rows[r]
        test.append(d)
    bef = "__bef"
    makeTable(colname[z],bef)
    for r in range(0,rmax):
        d = rows[r]
        addRow(d,bef)
    for ts in test:
#        print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
        ind = data[z].index(ts)#index of test row in data[z]
        l = "__aft"+str(test.index(ts))
        dafter = xy_proj(bef,data,ind,z,check)        
        makeTable(colname[z],l)
        for r in range(0,len(dafter)):
            d = dafter[r]
            addRow(d,l)
        if check ==True: tableprint(l) #print each leaf table
        hypotheses = hypbuild(data,l)
        where = klassAt(l)
        total = 0.0
        for h in hypotheses:
            total += len(data[h])
        want = ts[where]
        got = xy_nb(ts,data,hypotheses,total,l,k,m,check)
        #check what we are expecting and getting
        if check == True: print "want:",want,"got:",got 
        if want == got: acc+=1.0
        if check == True: sys.exit() #exit after a round
        abcd.keep(want,got)
示例#2
0
文件: xy_dt.py 项目: nave91/miner
def xy_dt(z, args, pdffile=False):

    zlst = xy_proj(z, data, args)
    if args['distprune']:
        zlst = tshortener.distance_pruner(zlst)

    clustered_data = regroup(zlst, "data")
    clustered_target = regroup(zlst, "target")
    #Convert lists to np.ndarrays
    clustered_target = np.asarray(np.float_(clustered_target))
    cd_temp = []
    for row in clustered_data:
        row_temp = []
        for item in row:
            row_temp.append(np.float_(item))
        cd_temp.append(row_temp)
    clustered_data = np.asarray(cd_temp)
    #Build Classifier
    clf = tree.DecisionTreeClassifier(min_samples_leaf=args['l'],
                                      criterion=args['c'])  #,max_depth=5)
    clf = clf.fit(clustered_data, clustered_target)
    if pdffile:
        from sklearn.externals.six import StringIO
        with open(z + "iris.dot", 'w') as f:
            f = tree.export_graphviz(clf, out_file=f, feature_names=colname[z])

    if args['dtreeprune']:
        branches = tshortener.prune_similar(clf,
                                            args,
                                            colname[zlst[1]],
                                            delete_more=True,
                                            prune=True)
        print "\n", "#" * 25, "New Tree", "#" * 25

    branches = xy_dt0(clf, colname[zlst[1]])
    return zlst, branches
示例#3
0
def xy_projed():
    csvfile = open('../data/'+argv[1]+'.csv','r')
    readCsv(csvfile,argv[2])
    #tableprint(argv[2])
    k = xy_proj(argv[2],data,4,argv[2],False)