Пример #1
0
        f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8))
        ax1.set_title(headers[col] + ' ' + scaled[0])
        ax1.hist(X[:, col], bins=bincount)
        ax2.hist(X_scaled[:, col], bins=bincount)
        X[:, col] = X_binned
        ax3.hist(X[:, col], bins=bincount)
        ax3.set_title(headers[col] + ' ' + scaled[1])
        f.show()

    if values:
        f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8))
        ax1.hist(y, bins=bincount)
        y_Binned = binScaling(y, 3)
        print "Binned the values"
        ax2.hist(preprocessing.scale(y), bins=bincount)
        ax3.hist(y_Binned, bins=bincount)
        f.suptitle('Y Values')
        f.show()

    if timeData:
        for i in range(len(headers)):
            dt = getDateTime('timeData.npy')
            plt.scatter(dt, preprocessing.scale(y), label='Values')
            plt.gcf().autofmt_xdate()
            plt.xlabel('Time')
            plt.grid()
            plt.show()
            i += 1

    plt.show()
Пример #2
0
def determine_NDGC(X,y,K,cv,clf_min,typev):
    dt = getDateTime('timeData.npy')
    
    NDGC = []
    tau = []
    print "K=",K
    print "CV=",cv
    for j in np.arange(0,cv):
        # Train Classifier
        score = 0
        while score < clf_min:
            shuffle_in_unison(X0,y0)
            X,Xt,y,yt = cross_validation.train_test_split(X0,y0, test_size=0.3, random_state=0)
            clf = SVR_fit(X,y)
            #clf = linear_regression_fit(X, y)
            score =  r2_score(yt, clf.predict(Xt))
        
        
        commList = []
        
        N = len(Xt)
        
        predicted = []
        recorded = []
        for i,row in enumerate(Xt):
            predicted.append(clf.predict(row))
            recorded.append(yt[i])
        
        tau.append(stats.kendalltau(predicted, recorded)[0])
        
        # bin the recorded values
        recorded = binning_tuple(recorded,5)
        for i, t in enumerate(recorded):
            commList.append((predicted[i],  t, dt[i]))
            
        
        
        
        DCG = 0
        iDCG = 0
        ind = 1
        
        sorted_by_ratio = sorted(commList, key=lambda tup: tup[1])[::-1]
        
        rankedList = []
        # Build Ranked List
        for i,tup in enumerate(sorted_by_ratio):
            rankedList.append((i+1, tup[0], tup[1], tup[2]))
            
        
        # Sort by predictions
        if typev == 1:        
            print "Classifier"
            rankedList = sorted(rankedList, key=lambda tup: tup[1])[::-1]
        if typev == 2: 
            print "TimeStamp"
            rankedList = sorted(rankedList, key=lambda tup: tup[3])[::-1]  
        if typev == 3:      
            print "Random"      
            random.shuffle(rankedList)
        
        
        ind = 1
        for tup in rankedList:
            fav = N - tup[0] + 1
            rank = tup[2]
            pow = 2**rank - 1
            CG = rank/math.log(ind+1,2)
            DCG += CG
            if ind == K:
                break;
            ind += 1
            
            
        # Sort by community
        rankedList = sorted(rankedList, key=lambda tup: tup[2])[::-1]
        #print "Ranked ratio List",rankedList  
        ind = 1
        for tup in rankedList:
            fav = N - tup[0] + 1
            rank = tup[2]
            pow = 2**rank - 1
            CG = rank/math.log(ind+1,2)
            iDCG += CG
            if ind == K:
                break;
            ind += 1
        
        
        
        print 'Test',j," - ", DCG/float(iDCG)
        NDGC.append(DCG/float(iDCG))
        
    return np.mean(NDGC), np.mean(tau)
Пример #3
0
        f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8))
        ax1.set_title(headers[col] + " " + scaled[0])
        ax1.hist(X[:, col], bins=bincount)
        ax2.hist(X_scaled[:, col], bins=bincount)
        X[:, col] = X_binned
        ax3.hist(X[:, col], bins=bincount)
        ax3.set_title(headers[col] + " " + scaled[1])
        f.show()

    if values:
        f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8))
        ax1.hist(y, bins=bincount)
        y_Binned = binScaling(y, 3)
        print "Binned the values"
        ax2.hist(preprocessing.scale(y), bins=bincount)
        ax3.hist(y_Binned, bins=bincount)
        f.suptitle("Y Values")
        f.show()

    if timeData:
        for i in range(len(headers)):
            dt = getDateTime("timeData.npy")
            plt.scatter(dt, preprocessing.scale(y), label="Values")
            plt.gcf().autofmt_xdate()
            plt.xlabel("Time")
            plt.grid()
            plt.show()
            i += 1

    plt.show()