示例#1
0
def _demo_cv():
    dim_x = 2
    num_ch = 2
    num_x_p = 2000
    num_x_n = 500

    x_p = 2 * np.random.randn(num_ch, num_x_p, dim_x)
    x_n = 4 + np.random.randn(num_ch, num_x_n, dim_x)
    y_p = [1] * num_x_p
    y_n = [0] * num_x_n

    x = np.concatenate((x_p, x_n), 1)
    y = np.concatenate(np.asarray([y_p, y_n]), 0)
    permutation = np.random.permutation(x.shape[1])
    x = x[:, permutation, :]
    y = y[permutation]

    arg_0 = test_cv(x, y)

    rda = RegularizedDiscriminantAnalysis()
    pca = DummyDimReduction()
    pipeline = Pipeline()
    pipeline.add(pca)
    pipeline.add(rda)
    arg_1 = cross_validation(x, y, pipeline)
    print('Cross Validation Flows!')

    return 0
示例#2
0
def test_cv(x, y):
    rda = RegularizedDiscriminantAnalysis()
    pca = DummyDimReduction()
    pipeline = Pipeline()
    pipeline.add(pca)
    pipeline.add(rda)
    arg = cross_validation(x, y, pipeline)

    return arg
示例#3
0
def train_pca_rda_kde_model(x, y, k_folds=10):
    """ Trains the Cw-PCA RDA KDE model given the input data and labels with
        cross validation and returns the model
        Args:
            x(ndarray[float]): C x N x k data array
            y(ndarray[int]): N x 1 observation (class) array
                N is number of samples k is dimensionality of features
                C is number of channels
            k_folds(int): Number of cross validation folds
        Return:
            model(pipeline): trained likelihood model
            """

    # Pipeline is the model. It can be populated manually
    rda = RegularizedDiscriminantAnalysis()
    pca = ChannelWisePrincipalComponentAnalysis(var_tol=.1 ** 5,
                                                num_ch=x.shape[0])
    model = Pipeline()
    model.add(pca)
    model.add(rda)

    # Cross validate
    arg_cv = cross_validation(x, y, model=model, k_folds=k_folds)

    # Get the AUC before the regularization
    tmp, sc_cv, y_cv = cost_cross_validation_auc(model, 1, x, y, arg_cv,
                                                 k_folds=10, split='uniform')
    auc_init = -tmp
    # Start Cross validation
    lam = arg_cv[0]
    gam = arg_cv[1]
    log.debug('Optimized val [gam:{} \ lam:{}]'.format(lam, gam))
    model.pipeline[1].lam = lam
    model.pipeline[1].gam = gam
    tmp, sc_cv, y_cv = cost_cross_validation_auc(model, 1, x, y, arg_cv,
                                                 k_folds=10, split='uniform')
    auc_cv = -tmp

    # After finding cross validation scores do one more round to learn the final RDA model
    model.fit(x, y)

    # Insert the density estimates to the model and train using the cross validated
    # scores to avoid over fitting. Observe that these scores are not obtained using
    # the final model
    bandwidth = 1.06 * min(
        np.std(sc_cv), iqr(sc_cv) / 1.34) * np.power(x.shape[0], -0.2)
    model.add(KernelDensityEstimate(bandwidth=bandwidth))
    model.pipeline[-1].fit(sc_cv, y_cv)

    # Report AUC
    log.debug('AUC-i: {}, AUC-cv: {}'.format(auc_init, auc_cv))

    return model, auc_cv