def eval(ds, testNum, p, splitProportion=0.2):
    #testNum=1
    #splitProportion=0.2
    
    allFeaturesF1=[]
    allFeaturesRecall=[]
    allFeaturesPrecision=[]
    
    featureSelctedF1=[]
    featureSelctedRecall = []
    featureSelctedPrecision = []
    
    for _ in range(testNum):
        tstdata, trndata = ds.splitWithProportion( splitProportion )
        X, Y = labanUtil.fromDStoXY(trndata)
        X_test, Y_test = labanUtil.fromDStoXY(tstdata)
        #localF1s = []
        #localRecalls = []
        #localPercisions = []
        for y, y_test in zip(Y, Y_test):
            if all(v == 0 for v in y):
                continue
            #clf = LinearSVC()#fit_intercept=True, C=p)
            #clf.sparsify()
            
            #clf = RandomForestClassifier()#criterion='entropy')
            #clf = tree.DecisionTreeClassifier()#max_depth=p)
            clf = AdaBoostClassifier()
            #clf = GradientBoostingClassifier()#, learning_rate=lr)
            #clf = ExtraTreesClassifier(n_estimators=p)
                        
            #svc = LinearSVC()
            #selector = RFE(estimator=svc, n_features_to_select=p*19, step=0.2)
            selector = SelectPercentile(chooser, percentile=p)
            
            selector.fit(X, y)
            name = str(clf).split()[0].split('(')[0]
            clf.fit(selector.transform(X), y)
            pred = clf.predict(selector.transform(X_test))
            
            featureSelctedF1.append(metrics.f1_score(y_test, pred))
            featureSelctedRecall.append(metrics.recall_score(y_test, pred))
            featureSelctedPrecision.append(metrics.precision_score(y_test, pred)) 
            
            clf.fit(X, y)
            pred = clf.predict(X_test)
            
            allFeaturesF1.append(metrics.f1_score(y_test, pred))
            allFeaturesRecall.append(metrics.recall_score(y_test, pred))
            allFeaturesPrecision.append(metrics.precision_score(y_test, pred))

    return np.mean(allFeaturesF1), np.mean(featureSelctedF1), \
        np.mean(allFeaturesRecall), np.mean(featureSelctedRecall), \
        np.mean(allFeaturesPrecision), np.mean(featureSelctedPrecision), \
        name
示例#2
0
 def eval(ds):
     f1s=[]
     pred = n.activateOnDataset(ds)
     X, Y = labanUtil.fromDStoXY(ds)
     for i,y in enumerate(Y):
         f1s.append(metrics.f1_score(np.round(y), np.round(pred[:,i])))
     return np.mean(f1s)
def createDiagram(source, quality):
    ds, featuresNames = labanUtil.getPybrainDataSet(source) 
    X, Y = labanUtil.fromDStoXY(ds)
    qualities, combinations = cp.getCombinations()
    y = Y[qualities.index(quality)]
    fileName = source+quality
    ig.createDiagram(X, y, featuresNames, fileName)
def eval(ds, clf, splitProportion=0.2, p=4):
    #splitProportion = 0.2
    tstdata, trndata = ds.splitWithProportion( splitProportion )
    X, Y = labanUtil.fromDStoXY(trndata)
    X_test, Y_test = labanUtil.fromDStoXY(tstdata)
    f1s=[]
    ps =[]
    rs=[]
    for i, (y, y_test) in enumerate(zip(Y, Y_test)):
        if all(v == 0 for v in y):
            continue
        selector = SelectPercentile(chooser, percentile=p)
        selector.fit(X, y)
        name = str(clf).split()[0].split('(')[0]
        clf.fit(selector.transform(X), y)
        pred = clf.predict(selector.transform(X_test))
        f1 = metrics.f1_score(y_test, pred)
        f1s.append(f1)
        ps.append(metrics.precision_score(y_test, pred))
        rs.append(metrics.recall_score(y_test, pred))
    return f1s, ps, rs
def eval(ds, clf, splitProportion=0.2, p=4):
    #splitProportion = 0.2
    tstdata, trndata = ds.splitWithProportion( splitProportion )
    X, Y = labanUtil.fromDStoXY(trndata)
    X_test, Y_test = labanUtil.fromDStoXY(tstdata)
    f1s=[]
    ps =[]
    rs=[]
    for i, (y, y_test) in enumerate(zip(Y, Y_test)):
        anova_filter = SelectKBest(f_classif, k=selectedFeaturesNum)
        pipe = Pipeline([
                        ('feature_selection', anova_filter),
                        ('classification', clf)
                        ])
        pipe.fit(X, y)
        pred = pipe.predict(X_test)
        name = str(clf).split()[0].split('(')[0]
        #clf.fit(selector.transform(X), y)
        #pred = clf.predict(selector.transform(X_test))
        f1 = metrics.f1_score(y_test, pred)
        f1s.append(f1)
        ps.append(metrics.precision_score(y_test, pred))
        rs.append(metrics.recall_score(y_test, pred))
    return f1s, ps, rs
示例#6
0
import LabanUtils.util as labanUtil
import LabanUtils.informationGain as ig
import mocapUtils.utils as utils
import matplotlib.pyplot as plt
import LabanUtils.combinationsParser as cp

CMAs = ['Rachelle', 'Karen']
trainSource = CMAs[0]
testSource = CMAs[1]
tstdata, featuresNames = labanUtil.getPybrainDataSet(testSource)  
print 'Data was read'
X2, Y2 = labanUtil.fromDStoXY(tstdata)
y=Y2[0]
igs, ps = ig.recursiveRanking(X2, y)
print igs
print max(igs)
"""
trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource) 
X1, Y1 = labanUtil.fromDStoXY(trndata)
cors = []
for y1, y2 in zip(Y1, Y2):
    im1 = ig.infoGain(X1, y1)
    print im1
    ind = [i for i, e in enumerate(im1) if e != 0]
    print ind
    im2 = ig.infoGain(X2, y2)
    print im2
    ind = [i for i, e in enumerate(im2) if e != 0]
    print ind
    cor = mocapUtils.corr(im1, im2)
    print cor
示例#7
0
import numpy as np
import pylab as pl
from sklearn import datasets, svm
from sklearn.feature_selection import SelectPercentile, f_classif
import LabanUtils.util as labanUtil
import LabanUtils.combinationsParser as cp
from multiprocessing import Pool

ds = labanUtil.getPybrainDataSet()
X, Y = labanUtil.fromDStoXY(ds)
X, Y = np.array(X), np.array(Y)



X_indices = np.arange(X.shape[-1])

###############################################################################
# Univariate feature selection with F-test for feature scoring
# We use the default selection function: the 10% most significant features
selector = SelectPercentile(f_classif, percentile=10)
selector.
selector.fit(X, Y[0])
scores = -np.log10(selector.pvalues_)
#scores /= scores.max()
pl.bar(X_indices - .45, scores, width=.2,
       label=r'Univariate score ($-Log(p_{value})$)', color='g')
###############################################################################
# Compare to the weights of an SVM
clf = svm.SVC(kernel='linear')
clf.fit(X, y)
示例#8
0
chooser=f_classif#ig.recursiveRanking#ig.infoGain##
    #splitProportion = 0.2
import mlpy
CMAs = ['Rachelle', 'Karen']
trainSource = CMAs[0]
testSource = CMAs[1]
withPCA=False
fs=False
#clf = AdaBoostClassifier()
#clf = svm.SVC(C=c, class_weight={1: ratio}, kernel='rbf')

tstdata, featuresNames = labanUtil.getPybrainDataSet(testSource)  
trndata, _ = labanUtil.getPybrainDataSet(trainSource)  
print 'Datasets were read'
X, Y = labanUtil.fromDStoXY(trndata)
X_test, Y_test = labanUtil.fromDStoXY(tstdata)

bestFeatures = open('bestFeatures.csv', 'w')
bestFeatures.flush()
bestFeatures.write('Quality, Feature Name, Operator, F-value, p-value\n')

performance = open('performance.csv', 'w')
performance.flush()
performance.write('Quality, Precision, Recall, F1 score\n')
totalF1Train=[]
totalF1Test=[]
totalPrecisionTest=[]
totalRecallTest=[]
totalCoesfs = []
cs = np.logspace(-3, 5, 40)
from sklearn.linear_model import SGDClassifier

import numpy as np
from sklearn import svm
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
import LabanUtils.combinationsParser as cp
from sklearn import manifold, datasets, decomposition, ensemble, lda, random_projection

quality = 'Advance'
trainSource = 'Karen'
testSource = 'Rachelle'
trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource)  
#tstdata, featuresNames = labanUtil.getPybrainDataSet(trainSource)  
#X_test, Y_test = labanUtil.fromDStoXY(tstdata)
X, Y = labanUtil.fromDStoXY(trndata)
qualities, combinations = cp.getCombinations()
y=Y[qualities.index(quality)]

"""
c=80
selectedFeaturesNum = 25
ratio ='auto'
clf = svm.LinearSVC(C=c,  loss='LR', penalty='L1', dual=False, class_weight='auto')#{1: ratio})
chooser=f_classif#ig.infoGain#ig.recursiveRanking
anova_filter = SelectKBest(chooser, k=selectedFeaturesNum)
pipe = Pipeline([
                ('feature_selection', anova_filter),
                ('classification', clf)
                ])
pipe.fit(X, y)
示例#10
0
def getXYforMultiSet(source):
    ds, featuresNames = labanUtil.getPybrainDataSet(source)
    X, Y = labanUtil.fromDStoXY(ds)
    return X, np.transpose(Y)