def eval(ds, testNum, p, splitProportion=0.2): #testNum=1 #splitProportion=0.2 allFeaturesF1=[] allFeaturesRecall=[] allFeaturesPrecision=[] featureSelctedF1=[] featureSelctedRecall = [] featureSelctedPrecision = [] for _ in range(testNum): tstdata, trndata = ds.splitWithProportion( splitProportion ) X, Y = labanUtil.fromDStoXY(trndata) X_test, Y_test = labanUtil.fromDStoXY(tstdata) #localF1s = [] #localRecalls = [] #localPercisions = [] for y, y_test in zip(Y, Y_test): if all(v == 0 for v in y): continue #clf = LinearSVC()#fit_intercept=True, C=p) #clf.sparsify() #clf = RandomForestClassifier()#criterion='entropy') #clf = tree.DecisionTreeClassifier()#max_depth=p) clf = AdaBoostClassifier() #clf = GradientBoostingClassifier()#, learning_rate=lr) #clf = ExtraTreesClassifier(n_estimators=p) #svc = LinearSVC() #selector = RFE(estimator=svc, n_features_to_select=p*19, step=0.2) selector = SelectPercentile(chooser, percentile=p) selector.fit(X, y) name = str(clf).split()[0].split('(')[0] clf.fit(selector.transform(X), y) pred = clf.predict(selector.transform(X_test)) featureSelctedF1.append(metrics.f1_score(y_test, pred)) featureSelctedRecall.append(metrics.recall_score(y_test, pred)) featureSelctedPrecision.append(metrics.precision_score(y_test, pred)) clf.fit(X, y) pred = clf.predict(X_test) allFeaturesF1.append(metrics.f1_score(y_test, pred)) allFeaturesRecall.append(metrics.recall_score(y_test, pred)) allFeaturesPrecision.append(metrics.precision_score(y_test, pred)) return np.mean(allFeaturesF1), np.mean(featureSelctedF1), \ np.mean(allFeaturesRecall), np.mean(featureSelctedRecall), \ np.mean(allFeaturesPrecision), np.mean(featureSelctedPrecision), \ name
def eval(ds): f1s=[] pred = n.activateOnDataset(ds) X, Y = labanUtil.fromDStoXY(ds) for i,y in enumerate(Y): f1s.append(metrics.f1_score(np.round(y), np.round(pred[:,i]))) return np.mean(f1s)
def createDiagram(source, quality): ds, featuresNames = labanUtil.getPybrainDataSet(source) X, Y = labanUtil.fromDStoXY(ds) qualities, combinations = cp.getCombinations() y = Y[qualities.index(quality)] fileName = source+quality ig.createDiagram(X, y, featuresNames, fileName)
def eval(ds, clf, splitProportion=0.2, p=4): #splitProportion = 0.2 tstdata, trndata = ds.splitWithProportion( splitProportion ) X, Y = labanUtil.fromDStoXY(trndata) X_test, Y_test = labanUtil.fromDStoXY(tstdata) f1s=[] ps =[] rs=[] for i, (y, y_test) in enumerate(zip(Y, Y_test)): if all(v == 0 for v in y): continue selector = SelectPercentile(chooser, percentile=p) selector.fit(X, y) name = str(clf).split()[0].split('(')[0] clf.fit(selector.transform(X), y) pred = clf.predict(selector.transform(X_test)) f1 = metrics.f1_score(y_test, pred) f1s.append(f1) ps.append(metrics.precision_score(y_test, pred)) rs.append(metrics.recall_score(y_test, pred)) return f1s, ps, rs
def eval(ds, clf, splitProportion=0.2, p=4): #splitProportion = 0.2 tstdata, trndata = ds.splitWithProportion( splitProportion ) X, Y = labanUtil.fromDStoXY(trndata) X_test, Y_test = labanUtil.fromDStoXY(tstdata) f1s=[] ps =[] rs=[] for i, (y, y_test) in enumerate(zip(Y, Y_test)): anova_filter = SelectKBest(f_classif, k=selectedFeaturesNum) pipe = Pipeline([ ('feature_selection', anova_filter), ('classification', clf) ]) pipe.fit(X, y) pred = pipe.predict(X_test) name = str(clf).split()[0].split('(')[0] #clf.fit(selector.transform(X), y) #pred = clf.predict(selector.transform(X_test)) f1 = metrics.f1_score(y_test, pred) f1s.append(f1) ps.append(metrics.precision_score(y_test, pred)) rs.append(metrics.recall_score(y_test, pred)) return f1s, ps, rs
import LabanUtils.util as labanUtil import LabanUtils.informationGain as ig import mocapUtils.utils as utils import matplotlib.pyplot as plt import LabanUtils.combinationsParser as cp CMAs = ['Rachelle', 'Karen'] trainSource = CMAs[0] testSource = CMAs[1] tstdata, featuresNames = labanUtil.getPybrainDataSet(testSource) print 'Data was read' X2, Y2 = labanUtil.fromDStoXY(tstdata) y=Y2[0] igs, ps = ig.recursiveRanking(X2, y) print igs print max(igs) """ trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource) X1, Y1 = labanUtil.fromDStoXY(trndata) cors = [] for y1, y2 in zip(Y1, Y2): im1 = ig.infoGain(X1, y1) print im1 ind = [i for i, e in enumerate(im1) if e != 0] print ind im2 = ig.infoGain(X2, y2) print im2 ind = [i for i, e in enumerate(im2) if e != 0] print ind cor = mocapUtils.corr(im1, im2) print cor
import numpy as np import pylab as pl from sklearn import datasets, svm from sklearn.feature_selection import SelectPercentile, f_classif import LabanUtils.util as labanUtil import LabanUtils.combinationsParser as cp from multiprocessing import Pool ds = labanUtil.getPybrainDataSet() X, Y = labanUtil.fromDStoXY(ds) X, Y = np.array(X), np.array(Y) X_indices = np.arange(X.shape[-1]) ############################################################################### # Univariate feature selection with F-test for feature scoring # We use the default selection function: the 10% most significant features selector = SelectPercentile(f_classif, percentile=10) selector. selector.fit(X, Y[0]) scores = -np.log10(selector.pvalues_) #scores /= scores.max() pl.bar(X_indices - .45, scores, width=.2, label=r'Univariate score ($-Log(p_{value})$)', color='g') ############################################################################### # Compare to the weights of an SVM clf = svm.SVC(kernel='linear') clf.fit(X, y)
chooser=f_classif#ig.recursiveRanking#ig.infoGain## #splitProportion = 0.2 import mlpy CMAs = ['Rachelle', 'Karen'] trainSource = CMAs[0] testSource = CMAs[1] withPCA=False fs=False #clf = AdaBoostClassifier() #clf = svm.SVC(C=c, class_weight={1: ratio}, kernel='rbf') tstdata, featuresNames = labanUtil.getPybrainDataSet(testSource) trndata, _ = labanUtil.getPybrainDataSet(trainSource) print 'Datasets were read' X, Y = labanUtil.fromDStoXY(trndata) X_test, Y_test = labanUtil.fromDStoXY(tstdata) bestFeatures = open('bestFeatures.csv', 'w') bestFeatures.flush() bestFeatures.write('Quality, Feature Name, Operator, F-value, p-value\n') performance = open('performance.csv', 'w') performance.flush() performance.write('Quality, Precision, Recall, F1 score\n') totalF1Train=[] totalF1Test=[] totalPrecisionTest=[] totalRecallTest=[] totalCoesfs = [] cs = np.logspace(-3, 5, 40)
from sklearn.linear_model import SGDClassifier import numpy as np from sklearn import svm from sklearn.feature_selection import SelectKBest, f_classif from sklearn.pipeline import Pipeline import LabanUtils.combinationsParser as cp from sklearn import manifold, datasets, decomposition, ensemble, lda, random_projection quality = 'Advance' trainSource = 'Karen' testSource = 'Rachelle' trndata, featuresNames = labanUtil.getPybrainDataSet(trainSource) #tstdata, featuresNames = labanUtil.getPybrainDataSet(trainSource) #X_test, Y_test = labanUtil.fromDStoXY(tstdata) X, Y = labanUtil.fromDStoXY(trndata) qualities, combinations = cp.getCombinations() y=Y[qualities.index(quality)] """ c=80 selectedFeaturesNum = 25 ratio ='auto' clf = svm.LinearSVC(C=c, loss='LR', penalty='L1', dual=False, class_weight='auto')#{1: ratio}) chooser=f_classif#ig.infoGain#ig.recursiveRanking anova_filter = SelectKBest(chooser, k=selectedFeaturesNum) pipe = Pipeline([ ('feature_selection', anova_filter), ('classification', clf) ]) pipe.fit(X, y)
def getXYforMultiSet(source): ds, featuresNames = labanUtil.getPybrainDataSet(source) X, Y = labanUtil.fromDStoXY(ds) return X, np.transpose(Y)