def nestedCVSVM(features, labels, classifier_name, normal=None,plot=True, rbf=False):
    looOuter= LeaveOneOut(len(labels))
    poolOuter=numpy.zeros((len(labels), 2))
    Cs=numpy.zeros((len(labels)))
    normal_features=features
    if(normal=="log"):
        normal_features=numpy.log(normal_features)
    if(normal=="scaled"):
        scaler=preprocessing.StandardScaler()
        normal_features=scaler.fit_transform(normal_features)
    #How good is the method in the outer loop (LR) at predicting cancer?
    for i, (trainOuter, testOuter) in enumerate(looOuter):
        outerFeaturesTrain=normal_features[trainOuter]
        outerLabelsTrain=labels[trainOuter]
        #What is the lr model with the best hyperparameter settings to predict the  
        #test sample from the training samples?
        best_auc=0
        best_c=0
        lessThanOneC=numpy.arange(0.01,1.0,0.01)
        greaterThanOneC=numpy.arange(1,101,1)
        for innerC in numpy.nditer(numpy.concatenate((lessThanOneC,greaterThanOneC))):
            #How good is the model with this hyperparameter?
            looInner=LeaveOneOut(len(outerLabelsTrain))
            poolInner=numpy.zeros((len(outerLabelsTrain), 2))
            for j, (trainInner, testInner) in enumerate(looInner):
                innerFeaturesTrain=outerFeaturesTrain[trainInner]
                innerLabelsTrain=outerLabelsTrain[trainInner]
                innerModel=svm.LinearSVC(penalty="l1", dual=False,C=float(innerC))
                if rbf==True:
                    innerModel=svm.SVC(kernel='rbf',C=float(innerC))
                innerModel.fit(innerFeaturesTrain,innerLabelsTrain)
                dfInner = innerModel.decision_function(outerFeaturesTrain[testInner])
                poolInner[j,0]=outerLabelsTrain[testInner]
                poolInner[j,1]=dfInner[0]
            fpr, tpr, thresholds = roc_curve(poolInner[:,0], poolInner[:,1])
            roc_auc = auc(fpr, tpr)
            if(roc_auc>best_auc):
                best_auc=roc_auc
                best_c=float(innerC)
        print( "C chosen for " + str(i)+ ": "+str(best_c) )
        Cs[i]=best_c
        bestCModel=svm.LinearSVC(penalty="l1", dual=False,C=best_c)
        if rbf==True:
            bestCModel=svm.SVC(kernel='rbf',C=best_c)
        bestCModel.fit(outerFeaturesTrain, outerLabelsTrain)
        dfOuter = bestCModel.decision_function(normal_features[testOuter])
        poolOuter[i,0]=labels[testOuter]
        poolOuter[i,1]=dfOuter[0]
    fpr, tpr, thresholds = roc_curve(poolOuter[:,0], poolOuter[:,1])
    roc_auc = auc(fpr, tpr)
    plotROC(fpr, tpr,roc_auc, classifier_name,plot)
    return Cs
    
    
def split_data(size, cv_model_name):

    # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.LeaveOneOut.html#sklearn.cross_validation.LeaveOneOut
    if cv_model_name == 'loo':
        from sklearn.cross_validation import LeaveOneOut
        cv_model = LeaveOneOut(size)

    # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.KFold.html#sklearn.cross_validation.KFold
    if cv_model_name == 'kfold':
        from sklearn.cross_validation import KFold
        cv_model = KFold(size, 10)

    # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.LeavePOut.html#sklearn.cross_validation.LeavePOut
    if cv_model_name == 'lpo':
        from sklearn.cross_validation import LeavePOut
        cv_model = LeavePOut(size, 2)

    # The folowwing 3 cross validation models
    # when learning a model *for each subj* leave a clip out (all it's sub-segments), 18
    if cv_model_name == 'LeaveOneClipOutForEachSubject':
        # This one also works for MODEL FOR EACH SUBJ (leaving a whole clip out)
        cv_model = []
        full_arr = np.array([i for i in range(size)])
        full_test = np.array_split(full_arr, NUM_CLIPS)

        for clip in range(NUM_CLIPS):
            test = full_test[clip]
            train = np.setdiff1d(full_arr, test)
            cv_model.append((train, test))

    # EASIEST: when learning a model *over all subj* leave a clip out (all it's sub-segments), 468
    if cv_model_name == 'LeaveOneClipOutForAllSubject':
        cv_model = []
        full_arr = np.array([i for i in range(size)])
        full_test = np.array_split(full_arr, NUM_CLIPS*len(dictionaries.SUBJECTS_IDS))

        for clip in range(NUM_CLIPS*len(dictionaries.SUBJECTS_IDS)):
            test = full_test[clip]
            train = np.setdiff1d(full_arr, test)
            cv_model.append((train, test))

    # when learning a model *over all subj* leave one subj out, 26
    # assuming number of clips is 18
    if cv_model_name == 'LeaveOneSubjOut':
        cv_model = []
        full_arr = np.array([i for i in range(size)])
        full_test = np.array_split(full_arr, len(dictionaries.SUBJECTS_IDS))

        for subj in range(len(dictionaries.SUBJECTS_IDS)):
            test = full_test[subj]
            train = np.setdiff1d(full_arr, test)
            cv_model.append((train, test))

    return cv_model
示例#3
0
 def fit(self):
     df = pd.read_csv('Datasetnew.csv',header=None)
     h=np.asarray(df)
     dataset = np.nan_to_num(h)
     XX = dataset[:,1:65]
     y = dataset[:,0]
     X = preprocessing.normalize(XX)
     loo = LeaveOneOut(len(y))
     correct_1 = 0
     correct_0 = 0
     wrong = 0
     for train, test in loo:
     	X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
     	clf = GridSearchCV(estimator=SVC(), param_grid=parameter_candidates, n_jobs=-1)
     	clf.fit(X_train, y_train)
     	predict = clf.predict(X_test)
     	cnf_matrix_mnb = confusion_matrix(y_test, predict)
     	if (predict == 1 and y_test ==1):
     		correct_1 = correct_1 + 1
     	elif(predict == 0 and y_test == 0):
     		correct_0 = correct_0 + 1
     	else:
     		wrong = wrong + 1
     print()
     print("correct_1 %s" %correct_1)
     print("correct_0 %s" %correct_0)
     print("wrong %s" %wrong)
def get_cv_method(targets, cvmethod='10', stratified=True):
    '''
    Create cross-validation class

    Input:
    targets   : class labels set in the same order as in X
    cvmethod  : string of a number or number for a K-fold method, 'loo' for LeaveOneOut
    stratified: boolean indicating whether to use a Stratified K-fold approach

    Output:
    cv: Returns a class from sklearn.cross_validation
    '''
    #cross-validation

    n = len(targets)

    if stratified:
        if isinstance(cvmethod, int):
            return StratifiedKFold(targets, cvmethod)
        elif isinstance(cvmethod, str):
            if cvmethod.isdigit():
                return StratifiedKFold(targets, int(cvmethod))
    else:
        if isinstance(cvmethod, int):
            return KFold(n, cvmethod)

        elif isinstance(cvmethod, str):
            if cvmethod.isdigit():
                return KFold(n, int(cvmethod))

    if cvmethod == 'loo':
        return LeaveOneOut(n)

    return StratifiedKFold(targets, int(cvmethod))
 def update_estimation(self):
     for (apps, usage) in self.cluster.apps_usage():
         if len(apps) > 0 and usage.is_not_idle():
             for rest, out in LeaveOneOut(len(apps)):
                 self.estimation.update_app(apps[out[0]], [apps[i] for i in rest], usage.rate())
     if self.print_estimation:
         self.estimation.print()
示例#6
0
def evalDatasets(trainSets):
    for inData in evalSets:
        seqs, vecs, labels = parseAndBinarize(inData)
        ggRec, ggPrec, ggF1, gcRec, gcPrec, gcF1 = rulePredScores(vecs, labels)

        cVal = LeaveOneOut(len(labels))
        cValPreds = []
        cValTests = []
        for train, test in cVal:
            X_train, X_test = vecs[train], vecs[test]
            y_train, y_test = labels[train], labels[test]
            #clf = svm.SVC(kernel="linear", probability=True)
            clf = tree.DecisionTreeClassifier(min_samples_leaf=4, max_depth=4)
            #clf = RandomForestClassifier()
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            cValPreds.append(y_pred[0])
            cValTests.append(y_test[0])

        clfRec, clfPrec, clfF1 = scorePreds(cValTests, cValPreds)

        row = [ggRec, ggPrec, gcRec, gcPrec, clfRec, clfPrec, clfF1]
        row = ["%0.2f" % x for x in row]
        row.insert(0, inData)
        print "\t".join(row)

        classifiers.append(("DecTree_" + inData, clf))

        #clf = dummy.DummyClassifier()
        #clf.fit(vecs, labels)
        #classifiers.append( ("Dummy_"+inData, clf) )

    return classifiers
示例#7
0
    def test_model(self, n_folds=10, leave_one_out=False):
        """
        Test the model by cross-validating with Stratified k-folds

        For a cross-validation example, see:
        http://scikit-learn.org/stable/auto_examples/plot_roc_crossval.html
        """
        log.debug("Testing model ({} folds)".format(n_folds))
        X = self.data.data
        y = self.data.target
        avg_score = 0.0

        if leave_one_out:
            cv = LeaveOneOut(len(y))
        else:
            cv = StratifiedKFold(y, n_folds=n_folds)

        for train, test in cv:
            model = self.build_model().fit(X[train], y[train])
            avg_score += model.score(X[test], y[test])

        if leave_one_out:
            avg_score /= len(y)
        else:
            avg_score /= n_folds

        print("Average score: {}".format(avg_score))
        return avg_score
示例#8
0
def loo(X, labels):
    label_encoder = LabelEncoder()
    int_labels = label_encoder.fit_transform(labels)
    print(int_labels)

    clf = SVC(kernel='linear')  #, probability=True)
    nb = X.shape[0]
    loo = LeaveOneOut(nb)

    silver, gold = [], []
    for train, test in loo:
        print('.')
        X_train, X_test = X[train], X[test]
        y_test = [int_labels[i] for i in test]
        y_train = [int_labels[i] for i in train]
        clf.fit(X_train, y_train)
        pred = clf.predict(X_test)
        silver.append(pred[0])
        gold.append(y_test[0])

    info = 'Accuracy after SVC-LOO:' + str(accuracy_score(silver, gold))

    # confusion matrix
    plt.clf()
    T = label_encoder.inverse_transform(gold)
    P = label_encoder.inverse_transform(silver)
    cm = confusion_matrix(T, P, labels=label_encoder.classes_)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    np.set_printoptions(precision=2)
    sns.plt.figure()
    plot_confusion_matrix(cm_normalized, target_names=label_encoder.classes_)
    sns.plt.title(info)
    sns.plt.savefig('../figures/conf_matrix.pdf')
示例#9
0
def leaveOneOut_error(Y, X):
    """
    Use GLM model from python statsmodels library to fit data.
    Evaluate with leave-one-out setting, return the average of n errors.
    
    Input:    
    features    - a list features. ['all'] == ['demo', 'poi', 'geo', 'taxi']
    gwr_gamma   - the GWR weight matrx

    Output:
    error - the average error of k leave-one-out evaluation
    """
    errors = []
    errs_train = np.zeros(2)
    loo = LeaveOneOut(len(Y))
    X = sm.add_constant(X, prepend=False)
    for train_idx, test_idx in loo:
        X_train, Y_train = X[train_idx], Y[train_idx]
        # Train NegativeBinomial Model from statsmodels library
        glm = sm.GLM(Y_train, X_train, family=sm.families.NegativeBinomial())
        nbm = glm.fit()
        ybar = nbm.predict(X[train_idx])
        er_train = np.mean(np.abs(ybar - Y[train_idx]))
        errs_train += er_train, er_train / np.mean(Y[train_idx])
        #        print er_train, er_train / np.mean(Y[train_idx])
        ybar = nbm.predict(X[test_idx])
        errors.append(np.abs(ybar - Y[test_idx]))


#        print ybar, Y[test_idx]
    print errs_train / len(Y)
    return np.mean(errors), np.mean(Y), np.mean(
        errors / Y), np.mean(errors) / np.mean(Y)
示例#10
0
def loo_regressions(xs, ys, ft, dt, mt):
    print '[INFO]', ft, dt

    # Align matricies
    x = xs.loc[:, ys.columns].dropna(axis=1).T
    y = ys[x.index].T

    # Define cross-validation
    cv = LeaveOneOut(len(y))

    # Run regressions
    y_pred, y_betas = {}, {}
    for m in y:
        y_pred[m] = {}

        betas = []
        for train, test in cv:
            lm = ElasticNet(alpha=0.01).fit(x.ix[train], y.ix[train, m])
            y_pred[m][x.index[test][0]] = lm.predict(x.ix[test])[0]

            betas.append(dict(zip(*(x.columns, lm.coef_))))

        y_betas[m] = DataFrame(betas).median().to_dict()

    y_pred = DataFrame(y_pred).ix[y.index, y.columns]
    print '[INFO] Regression done: ', ft, dt

    # Perform correlation with predicted values
    metabolites_corr = [(ft, dt, f, mt, 'metabolites',
                         pearson(y[f], y_pred[f])[0]) for f in y_pred]
    conditions_corr = [(ft, dt, s, mt, 'conditions',
                        pearson(y.ix[s], y_pred.ix[s])[0])
                       for s in y_pred.index]

    return (metabolites_corr + conditions_corr), (ft, dt, mt, y_betas)
示例#11
0
def run_analysis_pipeline(data, panel_size, output_file_name=None):

    log.debug("Panel size %d", panel_size)

    n = data.shape[0]
    n_features = data.shape[1] - 1
    feature_labels = list(data)[0:n_features]
    features = numpy.array(data.ix[:, :n_features])
    labels = numpy.array(data.ix[:, n_features])

    the_cv_fold = functools.partial(outer_cv_fold,
                                    features=features,
                                    labels=labels,
                                    feature_labels=feature_labels,
                                    panel_size=panel_size)

    with Pool(10) as p:
        results = p.map(the_cv_fold, LeaveOneOut(n))

    if output_file_name is not None:
        with open(output_file_name, 'wb') as outfile:
            dump(list(results), outfile)

        log.debug("Panel size %d results saved", panel_size)

    return (results)
示例#12
0
def knn(X, Y):

    neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    weights = ['distance']
    n_components = [5, 10, 15, 20, 25, 30]

    parameters = [{
        'k_nn__n_neighbors': neighbors,
        'k_nn__weights': weights,
        'pca__n_components': n_components
    }]

    dataLength = len(X)
    lv = LeaveOneOut(dataLength)

    pipeline = Pipeline([
        ('pca', PCA()),
        ('k_nn', KNeighborsClassifier()),
    ])

    clf = GridSearchCV(pipeline, parameters, cv=lv)
    clf.fit(X, Y)
    # Obtaining Parameters from grid_scores
    accuracy = [p[1] for p in clf.grid_scores_]
    pca_components = [p[0]['pca__n_components'] for p in clf.grid_scores_]
    knn_neighbors = [p[0]['k_nn__n_neighbors'] for p in clf.grid_scores_]

    asarray(accuracy)
    asarray(pca_components)
    asarray(knn_neighbors)

    accuracy = np.reshape(accuracy, (-1, 2))
    pca_components = np.reshape(pca_components, (-1, 2))
    knn_neighbors = np.reshape(knn_neighbors, (-1, 2))

    fig = plt.figure()

    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(pca_components,
                           knn_neighbors,
                           accuracy,
                           rstride=1,
                           cstride=1,
                           cmap=cm.coolwarm,
                           linewidth=0,
                           antialiased=False)
    #ax.set_zlim(-1.01, 1.01)
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

    fig.colorbar(surf, shrink=0.5, aspect=5)

    ax.set_xlabel('Number of PCA components')
    ax.set_ylabel('Number of neigbours')
    ax.set_zlabel('Accuracy')

    plt.show()

    print "Best Parameters: {}".format(clf.best_params_)
    print "Accuracy: {}".format(clf.best_score_)
示例#13
0
def OutlierDetector(TrainX, TrainY):
    Reg = linear_model.LinearRegression()
    loo = LeaveOneOut(len(TrainY))
    MAE = np.array(np.zeros(len(TrainY)))
    id = 0
    for train, test in loo:

        SubTrainX = TrainX.iloc[train, :]
        SubTrainY = TrainY.iloc[train]
        SubTestsX = TrainX.iloc[test, :]
        SubTestsY = TrainY.iloc[test]

        # Re-Indexing
        SubTrainX.index = np.arange(0, len(SubTrainX))
        SubTrainY.index = np.arange(0, len(SubTrainY))
        SubTestsX.index = np.arange(0, len(SubTestsX))
        SubTestsY.index = np.arange(0, len(SubTestsY))

        Reg.fit(SubTrainX, SubTrainY)
        TestOutput = Reg.predict(SubTestsX)
        MAE[id] = np.absolute(TestOutput - SubTestsY)
        id = id + 1

    Good = MAE.argsort()[:round(len(TrainY) * 0.8)]
    TrainX = TrainX.iloc[Good, :]
    TrainY = TrainY.iloc[Good]
    return TrainX, TrainY
示例#14
0
 def fit(self):
     df = pd.read_csv('Datasetnew.csv', header=None)
     h = np.asarray(df)
     dataset = np.nan_to_num(h)
     XX = dataset[:, 1:65]
     y = dataset[:, 0]
     X = preprocessing.normalize(XX)
     loo = LeaveOneOut(len(y))
     correct_1 = 0
     wrong_1 = 0
     correct_0 = 0
     wrong = 0
     for train, test in loo:
         X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[
             test]
         self.clf.fit(X_train, y_train)
         predict = self.clf.predict(X_test)
         cnf_matrix_mnb = confusion_matrix(y_test, predict)
         #print()
         #print("predicted %s" % predict)
         #print("original %s" % y_test)
         if (predict == 1 and y_test == 1):
             correct_1 = correct_1 + 1
         elif (predict == 0 and y_test == 0):
             correct_0 = correct_0 + 1
         else:
             wrong = wrong + 1
     print()
     print("correct_1 %s" % correct_1)
     print("correct_0 %s" % correct_0)
     print("wrong %s" % wrong)
    def test_mvl_fuse_function(self):
        Y, D, P, T, G = generate_raw_samples()
        T = sm.add_constant(T, prepend=False)
        P = sm.add_constant(P, prepend=False)
        D = sm.add_constant(D, prepend=False)
        G = sm.add_constant(G, prepend=False)
        loo = LeaveOneOut(len(Y))
        er = []
        for train_idx, test_idx in loo:
            tm = taxi_view_model(train_idx, Y, T)
            pm = poi_view_model(train_idx, Y, P)
            gm = geo_view_model(train_idx, Y, G)
            dm = demo_view_model(train_idx, Y, D)
            models = [tm, pm, gm, dm]
            lm = mvl_fuse_function(models, train_idx, Y)

            tm_test = tm[0].predict(T[test_idx])
            pm_test = pm[0].predict(P[test_idx])
            gm_test = gm[0].predict(G[test_idx])
            dm_test = dm[0].predict(D[test_idx])

            newX_test = np.array([1, tm_test, pm_test, gm_test, dm_test])
            ybar = lm.predict(newX_test)
            y_error = ybar - Y[test_idx]
            #            if np.abs(y_error / Y[test_idx]) > 0.8:
            #                print test_idx, ybar, Y[test_idx], newX_test
            er.append(y_error)
        mre = np.mean(np.abs(er)) / np.mean(Y)
        print "MVL with linear fusion function MRE: {0}".format(mre)
示例#16
0
    def _train_clf(self, X, y, n_estimators=10):
        clf = RandomForestClassifier(n_estimators,
                                     n_jobs=self.threads,
                                     class_weight=self.class_weights)
        scores = scores_accuracy = np.array([0])

        cv_algo = None
        if self.cv_method is not None:
            if self.cv_method == "LOO":
                cv_algo = LeaveOneOut(len(y))
            elif self.cv_method == "SKFold":
                cv_algo = StratifiedKFold(y)

            logger.info("Running cross-validation...")
            scores = model_selection.cross_val_score(
                clf,
                X,
                y,
                cv=cv_algo,
                scoring='neg_log_loss',
                n_jobs=self.threads,
                verbose=1,
            )

        clf.fit(X, y)
        return clf, scores.mean(), scores.std()
示例#17
0
 def local_homography_loocv_error(theta, args):
     src, tgt = args
     errs = [
         local_homography_error(theta, src[t_ix], tgt[t_ix], src[v_ix],
                                tgt[v_ix])
         for t_ix, v_ix in LeaveOneOut(len(src))
     ]
     return np.mean(errs)
示例#18
0
def calBestBandwidth(data):
    bandwidths = 10**np.linspace(-1, 1, 100)
    grid = GridSearchCV(KernelDensity(kernel="gaussian"),
                        {"bandwidth": bandwidths},
                        cv=LeaveOneOut(len(data)))
    grid.fit(data[:, None])
    ban = grid.best_params_.get("bandwidth")
    return ban
示例#19
0
 def score(model, X, y):
     return np.mean(
         cross_val_score(model,
                         X,
                         y,
                         cv=LeaveOneOut(X.shape[0]),
                         scoring=scoring,
                         n_jobs=-1))
示例#20
0
    def test_KCSD2D_cross_validation_five_electrodes(self):
        lambdas = np.array([100.0 / 2**n for n in range(1, 20)])
        n_elec = self.k.elec_pos.shape[0]
        index_generator = LeaveOneOut(n_elec)  #, indices=True)
        self.k.lambd = cv.choose_lambda(lambdas, self.k.sampled_pots,
                                        self.k.k_pot, self.k.elec_pos,
                                        index_generator)

        self.assertGreater(self.k.lambd, 25.0)
示例#21
0
def CV_determination(Y, Method):
    from sklearn.cross_validation import KFold, LeaveOneOut
    if Method == 'loo':
        kf = LeaveOneOut(len(Y))
    else:
        ind_k = [ind for ind, val in enumerate(list(Method)) if val == '-']
        k = int(Method[:ind_k[0]])
        kf = KFold(len(Y), k, shuffle=True, random_state=1)
    return kf
示例#22
0
def getErrorAcrossDays(normedDays, period, phase, gamma):
    days = array(normedDays)
    dailyErrors = []
    for (train, test) in LeaveOneOut(len(days)):
        training = appendUnique(days[train])
        testing = days[test][0]
        tExt, seriesExt = getParams(training, phase, period)
        fit = fitModel(tExt, seriesExt, [gamma])[gamma]['model']
        dailyErrors.append(getError(fit, testing, period, phase))
    return dailyErrors
示例#23
0
    def test_application(self):
        from scot.var import VAR
        from sklearn.cross_validation import LeaveOneOut, KFold
        np.random.seed(42)
        x = np.random.randn(10, 3, 15)

        var = VAR(3, xvschema=lambda n, _: LeaveOneOut(n)).optimize_delta_bisection(x)
        self.assertGreater(var.delta, 0)
        var = VAR(3, xvschema=lambda n, _: KFold(n, 5)).optimize_delta_bisection(x)
        self.assertGreater(var.delta, 0)
示例#24
0
def validate_each(known, model):
    loo = LeaveOneOut(len(known))
    for train, test in loo:
        trainx = known.iloc[train, :].loc[:, XCOLS]
        trainy = known.iloc[train, :].loc[:, YCOLS]
        testx = known.iloc[test, :].loc[:, XCOLS]
        model.fit(trainx, trainy)
        testy = model.predict(testx)
        known.loc[known.iloc[test, :].index, 'pred_lat'] = testy[0][0]
        known.loc[known.iloc[test, :].index, 'pred_lon'] = testy[0][1]
def score_spatial_model(X, label, cv=None, two_level=False, null=False):
    """Give a score to a data labelling With/out cross-validation

    Parameters
    ==========
    X: array of shape(n_voxels, n_subjects) the data to be parcelled
    label: array of shape (n_voxels) an index array describing the parcellation
    cv: string, optional,
         cross validation scheme, one of (None, 'loo', 'kfold', 'll', 'log_lr')
    two_level: bool, optional,
               whether a one-or two level variance partition scheme is used
    null: bool, optional
          whether the likelihood is estimated under H0 (mu=0) or not

    Returns
    =======
    score: float, the sumed log-likelihood of the data under the parcellation
    """
    from sklearn.cross_validation import LeaveOneOut, KFold
    score = 0

    if cv in ['bic', 'll', None]:
        ll, _, _, _, bic = parameter_map(X, label, two_level, null)
        if cv == 'bic':
            score = bic.sum()
        else:
            score = ll.sum()
    elif cv == 'log_lr':
        ll1, _, _, _, _ = parameter_map(X, label, two_level, False)
        ll2, _, _, _, _ = parameter_map(X, label, two_level, True)
        score = ll1.sum() - ll2.sum()
    elif cv in ['loo', 'kfold']:
        score = 0
        if cv == 'loo':
            cv = LeaveOneOut(X.shape[1])
        elif cv == 'kfold':
            cv = KFold(X.shape[1], min(10, X.shape[1]))
        for k in np.unique(label):
            for (train, test) in cv:
                mu = None
                if null:
                    mu = 0
                mu, sigma1, sigma2, _ = em_inference_regular(
                    X[label == k][:, train], two_level=two_level, mu=mu)
                test_ll = log_likelihood_regular(X[label == k][:, test],
                                                 mu,
                                                 sigma1,
                                                 sigma2,
                                                 two_level=two_level)
                score += test_ll
    else:
        raise ValueError(
            'unknown keyword from evaluation scheme (cv argument)')
    return score
示例#26
0
def loo_cv(X_train,y_train,clf):
    # Perform Leave-One-Out cross validation
    loo = LeaveOneOut(X_train[:].shape[0])
    scores=np.zeros(X_train[:].shape[0])
    for train_index,test_index in loo:
        X_train_cv, X_test_cv= X_train[train_index], X_train[test_index]
        y_train_cv, y_test_cv= y_train[train_index], y_train[test_index]
        clf = clf.fit(X_train_cv,y_train_cv)
        y_pred=clf.predict(X_test_cv)
        scores[test_index]=metrics.accuracy_score(y_test_cv.astype(int), y_pred.astype(int))
    print ("Mean score: {0:.3f} (+/-{1:.3f})").format(np.mean(scores), sem(scores))
def rbf_analysis(X, Y, c, g, title, filename):

	print "Performing Cross Validation on Penalty: {}".format(c)
	dataLength = len(X)
	loo = LeaveOneOut(dataLength)
	predictions = []
	expected = []
	TP, FN, TN, FP = 0, 0, 0, 0
	Accuracy = 0
	for train_index, test_index in loo:
		X_train, X_test = X[train_index], X[test_index]
		Y_train, Y_test = Y[train_index], Y[test_index][0]

		clf = SVC(C=c, gamma=g, kernel='rbf')
		clf.fit(X_train, Y_train)
		prediction = clf.predict(X_test)[0]
	
		predictions.append(prediction)
		expected.append(Y_test)

	print("Calculating.....")
	for i, prediction in enumerate(predictions):
		if(prediction == 1 and expected[i] == 1):
			TP += 1
		elif(prediction == 0 and expected[i] == 1):
			FN += 1
		elif(prediction == 0 and expected[i] == 0):
			TN += 1
		elif(prediction == 1 and expected[i] == 0):
			FP += 1
		else:
			pass

	Sensitivity = TP/float(TP + FN)
	Specificity = TN/float(TN + FP)
	Accuracy = (TP + TN)/float(TP + TN + FP + FN)

	# Saving data to file
	with open(filename, 'ab') as f:
		f.write("Sensitivity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Sensitivity, c, g))
		f.write("Specificity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Specificity, c, g))
		f.write("Accuracy of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Accuracy, c, g))
		f.write("Matthews Correlation Coeefficient Value: {}\n".format(matthews_corrcoef(predictions, expected)))
		f.write("Classification Report:\n")
		f.write(classification_report(predictions, expected))
		f.write("Confusion Matrix\n")
		cm = confusion_matrix(predictions, expected)
		f.write(str(cm))
		cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
		label1 = "Negative"
		label2 = "Positive"
			
		plt.figure()
		plot_confusion_matrix(cm, title, label1, label2)
示例#28
0
def loo_cv(X_train, y_train, clf):
    loo = LeaveOneOut(X_train[:].shape[0])  # number of rows
    scores = np.zeros(X_train[:].shape[0])
    for train_index, test_index in loo:
        X_train_cv, X_test_cv = X_train[train_index], X_train[test_index]
        y_train_cv, y_test_cv = y_train[train_index], y_train[test_index]
        clf.clf.fit(X_train_cv, y_train_cv)
        y_pred = clf.predict(X_test_cv)
        scores[test_index] = metrics.accuracy_score(y_test_cv.astype(int),
                                                    y_pred.astype(int))
    print("Mean score: {0:.3f} {+/-{1:.3f}}".format(npp.mean(scores),
                                                    sem(scores)))
 def loadVideos(self):
     """
     Load the video data, Extract feature and train hmm model
     """
     mat_contents = sio.loadmat('data/original_masks.mat')
     mat_contents = mat_contents['original_masks']
     for category_name in self.categories:
         """Each  category"""
         images = []
         for person in self.persons:
             """Each person"""
             if person == 'lena_' and (category_name == 'run'
                                       or category_name == 'skip'
                                       or category_name == 'walk'):
                 """Person is Lena and category run, skip or walk"""
                 video = mat_contents[person + category_name + '1'][0][0]
                 if self.args.mhi:
                     data = self.extractMhiFeature(video)
                 else:
                     data = self.extractFeature(video)
                 images.append(data)
                 video = mat_contents[person + category_name + '2'][0][0]
                 if self.args.mhi:
                     data = self.extractMhiFeature(video)
                 else:
                     data = self.extractFeature(video)
                 images.append(data)
             else:
                 video = mat_contents[person + category_name][0][0]
                 if self.args.mhi:
                     data = self.extractMhiFeature(video)
                 else:
                     data = self.extractFeature(video)
                 images.append(data)
         if images.__len__() != 0:
             loo = LeaveOneOut(images.__len__())
             images = np.array(images)
             """train hmm with category all video"""
             self.fullDataTrainHmm[
                 category_name], std_scale, std_scale1 = self.train(images)
             self.model[category_name] = {}
             self.model[category_name]['hmm'] = []
             self.model[category_name]['std_scale'] = []
             self.model[category_name]['std_scale1'] = []
             self.model[category_name]['data'] = []
             for train, test in loo:
                 markov_model, std_scale, std_scale1 = self.train(
                     images[train])
                 self.model[category_name]['hmm'].append(markov_model)
                 self.model[category_name]['std_scale'].append(std_scale)
                 self.model[category_name]['std_scale1'].append(std_scale1)
                 self.model[category_name]['data'].append(images[test])
         self.target_names = self.categories
示例#30
0
def loo_cv(X_train, Y_train, clf):
    loo = LeaveOneOut(X_train[:].shape[0])
    scores = np.zeros(X_train[:].shape[0])
    for train_index, test_index in loo:
        X_train_cv, X_test_cv = X_train[train_index], X_train[test_index]
        Y_train_cv, Y_test_cv = Y_train[train_index], Y_train[test_index]
        clf = clf.fit(X_train_cv, Y_train_cv)
        Y_pred = clf.predict(X_test_cv)
        scores[test_index] = metrics.accuracy_score(
            Y_test_cv.astype(int), Y_pred.astype(int))  #这里astype(int)有问题吗?
        print("Loo_cv mean score: {0:.3f} (+/-{1:.3f})").format(
            np.mean(scores), sem(scores))
def rbf(X, Y):
	# Performing Grid Search for Parameter Selection
	C = [1,2,5,10,15,20,25,30,50,100,200,500,1000,2000,5000,10000]
	gamma = [0.1,0.01,0.001,0.0001,0.00001,0.000001,0.5,0.05,0.005,0.0005,0.00005,0,000005]
	parameters = [{'kernel': ['rbf'], 'gamma': gamma,'C': C}]
	dataLength = len(X)
	svm = SVC()
	lv = LeaveOneOut(dataLength)
	clf = GridSearchCV(svm, parameters, cv= lv)
	clf.fit(X, Y)
	print("Best Params for RBF: {}".format(clf.best_params_))
	print("Accuracy: {}".format(clf.best_score_))
	return clf.best_params_