Exemplo n.º 1
0
def PersonWorker(person):
    print('starting on person: ', str(person))

    #data = 40 videos x 32 alpha(csp channel)
    (X_train, y_train, X_test, y_test) = DL.loadPersonEpochDimRedu(person=person,
        featureFunc = featureFunc,
    )
    
    #http://stackoverflow.com/questions/26963454/lda-ignoring-n-components => only 1 feature :(
    print(np.shape(X_train))

    svm = LinearSVC()
    svm.fit(X_train, y_train)
    
    y = svm.predict(X_train)
    y = label_binarize(y, classes=[0, 1, 2, 3])
    train_auc = UT.auc(y, y_train)

    y = svm.predict(X_test)
    y = label_binarize(y, classes=[0, 1, 2, 3])
    test_auc = UT.auc(y, y_test)


    print('person: ', person, 
        ' - train auc: ', str(train_auc),
        ' - test auc: ' , str(test_auc)
    )

    return [train_auc, test_auc]
Exemplo n.º 2
0
def PersonWorker(person):
    max_k = 4#len(X_train[0])

    #load data
    X_train, y_train, X_test, y_test = loadPerson(
            person = person,
            classFunc = valClassFunc,
            featureFunc = featureFunc,
            plots = False
    )
        
    #init academic loop to optimize k param
    k = 1
    anova_filter = SelectKBest(f_regression)
    lda          = LinearDiscriminantAnalysis()
    anova_lda    = Pipeline([
        ('anova', anova_filter), 
        ('lda', lda)
    ])
    anova_lda.set_params(anova__k=k)

    K_CV = KFold(n=len(X_train), 
        n_folds=len(X_train),
        random_state=17, #fixed randomseed ensure that the sets are always the same
        shuffle=False
    ) #leave out one validation

    predictions, truths = [], []
    for train_index, CV_index in K_CV: #train index here is a part of the train set
        #train
        anova_lda.fit(X_train[train_index], y_train[train_index])

        #predict
        pred = anova_lda.predict(X_train[CV_index])

        #save for metric calculations
        predictions.extend(pred)
        truths.extend(y_train[CV_index])

    #optimization metric:
    best_acc = UT.accuracy(predictions, truths)
    best_k   = k
    
    #now try different k values
    for k in range(2,max_k):
        anova_filter = SelectKBest(f_regression)
        lda          = LinearDiscriminantAnalysis()
        anova_lda    = Pipeline([
            ('anova', anova_filter), 
            ('lda', lda)
        ])
        #set k param
        anova_lda.set_params(anova__k=k)

        #leave one out validation to determine how good the k value performs
        K_CV = KFold(n=len(X_train), 
            n_folds=len(X_train),
            random_state=17, #fixed randomseed ensure that the sets are always the same
            shuffle=False
        )

        predictions, truths = [], []
        for train_index, CV_index in K_CV: #train index here is a part of the train set
            #train
            anova_lda.fit(X_train[train_index], y_train[train_index])

            #predict
            pred = anova_lda.predict(X_train[CV_index])

            #save for metric calculations
            predictions.extend(pred)
            truths.extend(y_train[CV_index])

        #optimization metric:
        curr_acc = UT.accuracy(predictions, truths)
        if curr_acc > best_acc:
            best_acc = curr_acc
            best_k   = k

    #now the k param is optimized and stored in best_k

    #create classifier and train it on all train data
    anova_filter = SelectKBest(f_regression)
    lda          = LinearDiscriminantAnalysis()
    anova_lda    = Pipeline([
        ('anova', anova_filter), 
        ('lda', lda)
    ])
    #set k param
    anova_lda.set_params(anova__k=best_k)
    anova_lda.fit(X_train, y_train)

    predictions = anova_lda.predict(X_test)

    acc  = UT.accuracy(predictions, y_test)
    (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test)
    auc = UT.auc(predictions, y_test)

    print('person: ', person,
        ' - k: '  , str(best_k),
        ' - acc: ', str(acc),
        ' - tpr: ' , str(tpr),
        ' - tnr: ' , str(tnr),
        ' - auc: ', str(auc),
        'used features', anova_lda.named_steps['anova'].get_support()
    )
    retArr = [best_k, acc,tpr,tnr,fpr,fnr,auc]
    retArr.extend(anova_lda.named_steps['anova'].get_support())

    '''
    print('person: ', person,
        ' - k: '  , str(best_k),
        ' - acc: ', str(acc),
        'used features', getUsedFeatures(anova_lda.named_steps['anova'].get_support())
        #anova_lda.named_steps['anova'].get_support()
    )

    classCorr = UT.classCorrect(predictions, y_test)
    dimCorr = UT.dimCorrect(predictions, y_test)

    returnArr = [best_k, acc ]
    returnArr.extend(classCorr)
    returnArr.extend(dimCorr)
    returnArr.extend(anova_lda.named_steps['anova'].get_support())
    return returnArr
    '''

    return retArr
Exemplo n.º 3
0
def PersonWorker(person):
    print('starting on person: ', str(person))

    #data = 40 videos x 32 alpha(csp channel)
    X_train, y_train, X_test, y_test, csp = DL.loadPerson(person=person,
        featureFunc = featureFunc,
        use_csp=False,
        use_median = False
    )
    
    C = 1
    clf = LinearSVC(C=C,random_state=40)
    K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, shuffle=False) #leave out one validation
    predictions, truths = [], []
    for train_index, CV_index in K_CV:
        #train
        clf.fit(X_train[train_index], y_train[train_index])

        #predict
        pred = clf.predict(X_train[CV_index])

        #save for metric calculations
        predictions.extend(pred)
        truths.extend(y_train[CV_index])

    #optimization metric:
    best_metric = UT.auc(predictions, truths)
    best_C = C
    
    #try other C values
    for C in [0.01,0.03,0.1,0.3,3,10]:
        clf = LinearSVC(C=C,random_state=40)
        K_CV = KFold(n=len(X_train), n_folds=len(X_train), random_state=17, shuffle=True) #leave out one validation
        predictions, truths = [], []
        for train_index, CV_index in K_CV:
            #train
            clf.fit(X_train[train_index], y_train[train_index])

            #predict
            pred = clf.predict(X_train[CV_index])

            #save for metric calculations
            predictions.extend(pred)
            truths.extend(y_train[CV_index])

        #optimization metric:
        metric = UT.auc(predictions, truths)
        if metric > best_metric:
            best_metric = metric
            best_C = C

    #C param is now optimized, its value is stored in best_C

    #calculate all performance metrics on testset, using the optimal classifier
    clf = LinearSVC(C=C,random_state=40)
    clf.fit(X_train,y_train) #fit all training data
    #print("coef ", clf.coef_)
    predictions = clf.predict(X_test)

    acc  = UT.accuracy(predictions, y_test)
    (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test)
    auc = UT.auc(predictions, y_test)

    print('person: ', person, 
        ' - acc: ', str(acc),
        ' - tpr: ' , str(tpr),
        ' - tnr: ' , str(tnr),
        ' - auc: ', str(auc)
    )

    return [acc,tpr,tnr,fpr,fnr,auc]
Exemplo n.º 4
0
def PersonWorker(person):
    print('starting on person: ', str(person))

    #data = 40 videos x 32 alpha(csp channel)
    X_train, y_train, X_test, y_test, csp = DL.loadPerson(person=person,
        featureFunc = featureFunc,
        use_median=False,
        use_csp=True,
        prefilter=False
    )

    #store weights of upper CSP channel for topoplots
    csp.write_filters()

    #optimize channelPairs with leave-one out validation
    #prior probabilities
    pos_prior = np.sum(y_train)
    neg_prior = 40 - pos_prior
    pos_prior /= float(40)
    neg_prior /= float(40)

    #academic loop start with 1 channelPair
    channelPairs = 1

    #filter out the channel pairs
    X = np.zeros((len(X_train),channelPairs * 2,))
    top_offset = channelPairs * 2 - 1
    for j, k in zip(range(channelPairs), range(31,31-channelPairs,-1)):
        X[:,j] = X_train[:,j]
        X[:,top_offset -j] = X_train[:,k]

    #LDA
    lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior])
    K_CV = KFold(n=len(X), n_folds=len(X), random_state=17, shuffle=False) #leave out one validation
    predictions, truths = [], []
    for train_index, CV_index in K_CV:
        #train
        lda = lda.fit(X[train_index], y_train[train_index])

        #predict
        pred = lda.predict(X[CV_index])

        #save for metric calculations
        predictions.extend(pred)
        truths.extend(y_train[CV_index])

    #optimization metric:
    best_metric = UT.accuracy(predictions, truths)
    best_channelPairs = channelPairs
    
    #try other channel pairs
    for channelPairs in range(2,17):
        #filter out the channel pairs
        X = np.zeros((len(X_train),channelPairs * 2,))
        top_offset = channelPairs * 2 - 1
        for j, k in zip(range(channelPairs), range(31,31-channelPairs,-1)):
            X[:,j] = X_train[:,j]
            X[:,top_offset -j] = X_train[:,k]

        #LDA
        lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior])
        K_CV = KFold(n=len(X), n_folds=len(X), random_state=17, shuffle=True) #leave out one validation
        predictions, truths = [], []
        for train_index, CV_index in K_CV:
            #train
            lda = lda.fit(X[train_index], y_train[train_index])

            #predict
            pred = lda.predict(X[CV_index])

            #save for metric calculations
            predictions.extend(pred)
            truths.extend(y_train[CV_index])

        #optimization metric:
        metric = UT.accuracy(predictions, truths)
        if metric > best_metric:
            best_metric = metric
            best_channelPairs = channelPairs

    #channel pairs are now optimized, its value is stored in best_channelPairs

    #calculate all performance metrics on testset, using the optimal classifier
    lda = LinearDiscriminantAnalysis(priors=[neg_prior, pos_prior])
    lda = lda.fit(X_train,y_train) #fit all training data
    predictions = lda.predict(X_test)

    acc  = UT.accuracy(predictions, y_test)
    (tpr,tnr,fpr,fnr) = UT.tprtnrfprfnr(predictions, y_test)
    auc = UT.auc(predictions, y_test)

    print('person: ', person, 
        ' - channelPairs: ', str(best_channelPairs),
        ' - acc: ', str(acc),
        ' - tpr: ' , str(tpr),
        ' - tnr: ' , str(tnr),
        ' - auc: ', str(auc)
    )

    return [best_channelPairs, acc,tpr,tnr,fpr,fnr,auc]