Пример #1
0
def experiment_schizophrenia_data(data_path='data',
                                  n_folds=5,
                                  iterations=10000,
                                  verbose=True,
                                  plot=True,
                                  random_state=None):
    """Run the experiments on the Schizophrenia dataset

    Parameters:
    ----------
    data_path: string
        Path to the folder containing the dataset.
    n_folds: int
        The number of folds in a StratifiedKFold cross-validation
    iterations: int
        Number of iterations to compute the null distribution of
        balanced_accuracy and MMD^2_u
    verbose: bool
    plot: bool
        Whether to plot the results of the statistical tests.

    """
    name = 'Schizophrenia'
    if verbose:
        print '\nWorking on %s dataset...' % name
        print '-----------------------'
    X, y = load_schizophrenia_data(data_path, verbose=verbose)

    # DCE Embedding
    if verbose:
        print '\n### Results for DCE_Embedding ###'

    X_dce = DCE_embedding(X)
    K_dce = compute_rbf_kernel_matrix(X_dce)
    simple_experiment(K_dce,
                      y,
                      n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose,
                      data_name=name + '_dce',
                      plot=plot,
                      random_state=random_state)

    # DR Embedding
    if verbose:
        print '\n### Results for DR_Embedding ###'

    X_dr = DR_embedding(X)
    K_dr = compute_rbf_kernel_matrix(X_dr)
    simple_experiment(K_dr,
                      y,
                      n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose,
                      data_name=name + '_dr',
                      plot=plot,
                      random_state=random_state)

    # WL Kernel based Embedding
    if verbose:
        print '\n### Results for WL_K_Embedding ###'
    th = 0.2
    K_wl = WL_K_embedding(X, th)
    simple_experiment(K_wl,
                      y,
                      n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose,
                      data_name=name + '_wl',
                      plot=plot,
                      random_state=random_state)

    # SP Kernel based Embedding
    if verbose:
        print '\n### Results for SP_K_Embedding ###'
    th = 0.2
    K_sp = SP_K_embedding(X, th)
    simple_experiment(K_sp,
                      y,
                      n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose,
                      data_name=name + '_sp',
                      plot=plot,
                      random_state=random_state)
Пример #2
0
def check_instability_classification(X,
                                     y,
                                     location='',
                                     n_folds=5,
                                     iterations=10000,
                                     verbose=True,
                                     reps=100,
                                     seed=0):
    """
    """
    if verbose:
        print 'Computing embeddings...'

    X_dce = DCE_embedding(X)
    K_dce = compute_rbf_kernel_matrix(X_dce)
    X_dr = DR_embedding(X)
    K_dr = compute_rbf_kernel_matrix(X_dr)
    X_wl = WL_K_embedding(X, th=0.2)
    K_wl = compute_rbf_kernel_matrix(X_wl)

    np.random.seed(seed)
    seeds = np.random.randint(0, 10000000, reps)

    accs_dce = np.zeros(reps)
    accs_dr = np.zeros(reps)
    accs_wl = np.zeros(reps)
    pvals_dce = np.zeros(reps)
    pvals_dr = np.zeros(reps)
    pvals_wl = np.zeros(reps)

    if verbose:
        print 'Computing null distributions...'

    param_grid = [{'C': np.logspace(-5, 5, 25)}]
    yis = [np.random.permutation(y) for i in range(iterations)]
    acc_null_dce = Parallel(n_jobs=-1)(
        delayed(compute_svm_score_nestedCV)(K_dce,
                                            yis[i],
                                            n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            param_grid=param_grid)
        for i in range(iterations))
    acc_null_dr = Parallel(n_jobs=-1)(
        delayed(compute_svm_score_nestedCV)(K_dr,
                                            yis[i],
                                            n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            param_grid=param_grid)
        for i in range(iterations))
    acc_null_wl = Parallel(n_jobs=-1)(
        delayed(compute_svm_score_nestedCV)(K_wl,
                                            yis[i],
                                            n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            param_grid=param_grid)
        for i in range(iterations))

    for i, s in enumerate(seeds):
        # if verbose:
        #     print 'Repetition number: % s, seed: %s' % (i, s)

        rs = np.random.RandomState(s)
        # DCE Embedding
        acc_dce = compute_svm_score_nestedCV(K_dce,
                                             y,
                                             n_folds,
                                             scoring=balanced_accuracy_scoring,
                                             random_state=rs,
                                             param_grid=param_grid)

        p_value_dce = max(1.0 / iterations,
                          (acc_null_dce > acc_dce).sum() / float(iterations))
        accs_dce[i] = acc_dce
        pvals_dce[i] = p_value_dce
        # if verbose:
        #     print "DCE => acc: %s, p_value: %s" %(acc_dce, p_value_dce)

        # DR Embedding
        acc_dr = compute_svm_score_nestedCV(K_dr,
                                            y,
                                            n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            random_state=rs,
                                            param_grid=param_grid)
        p_value_dr = max(1.0 / iterations,
                         (acc_null_dr > acc_dr).sum() / float(iterations))
        accs_dr[i] = acc_dr
        pvals_dr[i] = p_value_dr
        # if verbose:
        #     print "DR => acc: %s, p_value: %s" % (acc_dr, p_value_dr)

        # WL Kernel
        acc_wl = compute_svm_score_nestedCV(K_wl,
                                            y,
                                            n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            random_state=rs,
                                            param_grid=param_grid)
        p_value_wl = max(1.0 / iterations,
                         (acc_null_wl > acc_wl).sum() / float(iterations))
        accs_wl[i] = acc_wl
        pvals_wl[i] = p_value_wl
        # if verbose:
        #     print "WL => acc: %s, p_value: %s" %(acc_wl, p_value_wl)

    # saving the results
    if not os.path.isdir('./results_dic'):
        os.mkdir('./results_dic')

    save_dir = './results_dic'
    if verbose:
        print 'Saving results at %s' % save_dir

    res_dce = {}
    res_dce['null_distribution'] = np.array(acc_null_dce)
    res_dce['accuracies'] = accs_dce
    res_dce['p_values'] = pvals_dce

    res_dr = {}
    res_dr['null_distribution'] = np.array(acc_null_dr)
    res_dr['accuracies'] = accs_dr
    res_dr['p_values'] = pvals_dr

    res_wl = {}
    res_wl['null_distribution'] = np.array(acc_null_wl)
    res_wl['accuracies'] = accs_wl
    res_wl['p_values'] = pvals_wl

    pickle.dump(res_dce,
                open(os.path.join(save_dir, '%s_dce.pkl' % location), 'wb'))
    pickle.dump(res_dr,
                open(os.path.join(save_dir, '%s_dre.pkl' % location), 'wb'))
    pickle.dump(res_wl,
                open(os.path.join(save_dir, '%s_wl.pkl' % location), 'wb'))

    if verbose:
        print 'DCE p-values: min:%s, mean:%s, max:%s' % (
            np.min(pvals_dce), np.mean(pvals_dce), np.max(pvals_dce))
        print 'DR  p-values: min:%s, mean:%s, max:%s' % (
            np.min(pvals_dr), np.mean(pvals_dr), np.max(pvals_dr))
        print 'WL  p-values: min:%s, mean:%s, max:%s' % (
            np.min(pvals_wl), np.mean(pvals_wl), np.max(pvals_wl))

    return accs_dce, pvals_dce, accs_dr, pvals_dr, accs_wl, pvals_wl
Пример #3
0
def experiment_1000_func_conn_data(data_path='data',
                                   location='all',
                                   n_folds=5,
                                   iterations=10000,
                                   verbose=True,
                                   plot=True,
                                   random_state=None):
    """
    Run the experiments on the 1000_functional_connectome data.

    Parameters:
    ----------
    data_path: string
        Path to the folder containing the dataset.
    location: string
        If location=='all' we run the experiments for all locations, otherwise
        only the selected location is used.
    n_folds: int
        The number of folds in a StratifiedKFold cross-validation
    iterations: int
        Number of iterations to compute the null distribution of
        balanced_accuracy and MMD^2_u
    verbose: bool
    plot: bool
        Whether to plot the results of the statistical tests.
    """
    if location == 'all':
        locs = os.listdir(
            os.path.join(data_path, 'Functional_Connectomes', 'Locations'))
        if verbose:
            print("")
            print("We will analyze the following datasets:")
            print("%s \n" % '\n'.join(locs))

    else:
        locs = [location]

    for name in locs:
        if verbose:
            print('')
            print('Working on %s dataset...' % name)
            print('-----------------------')

        X, y = load_1000_funct_connectome(data_path, name, verbose=verbose)

        # DCE Embedding
        if verbose:
            print('')
            print('### Results for DCE_Embedding ###')

        X_dce = DCE_embedding(X)
        K_dce = compute_rbf_kernel_matrix(X_dce)
        simple_experiment(K_dce,
                          y,
                          n_folds=n_folds,
                          iterations=iterations,
                          verbose=verbose,
                          data_name=name + '_dce',
                          plot=plot,
                          random_state=random_state)

        # DR Embedding
        if verbose:
            print('')
            print('### Results for DR_Embedding ###')

        X_dr = DR_embedding(X)
        K_dr = compute_rbf_kernel_matrix(X_dr)
        simple_experiment(K_dr,
                          y,
                          n_folds=n_folds,
                          iterations=iterations,
                          verbose=verbose,
                          data_name=name + '_dr',
                          plot=plot,
                          random_state=random_state)
Пример #4
0
def experiment_schizophrenia_data(data_path='data', n_folds=5,
                                  iterations=10000,
                                  verbose=True, plot=True, random_state=None):
    """Run the experiments on the Schizophrenia dataset

    Parameters:
    ----------
    data_path: string
        Path to the folder containing the dataset.
    n_folds: int
        The number of folds in a StratifiedKFold cross-validation
    iterations: int
        Number of iterations to compute the null distribution of
        balanced_accuracy and MMD^2_u
    verbose: bool
    plot: bool
        Whether to plot the results of the statistical tests.

    """
    name = 'Schizophrenia'
    if verbose:
        print '\nWorking on %s dataset...' % name
        print '-----------------------'
    X, y = load_schizophrenia_data(data_path, verbose=verbose)

    # DCE Embedding
    if verbose:
        print '\n### Results for DCE_Embedding ###'

    X_dce = DCE_embedding(X)
    K_dce = compute_rbf_kernel_matrix(X_dce)
    simple_experiment(K_dce, y, n_folds=n_folds, iterations=iterations,
                      verbose=verbose, data_name=name + '_dce', plot=plot,
                      random_state=random_state)

    # DR Embedding
    if verbose:
        print '\n### Results for DR_Embedding ###'

    X_dr = DR_embedding(X)
    K_dr = compute_rbf_kernel_matrix(X_dr)
    simple_experiment(K_dr, y, n_folds=n_folds, iterations=iterations,
                      verbose=verbose, data_name=name+'_dr',plot=plot,
                      random_state=random_state)

    # WL Kernel based Embedding
    if verbose:
        print '\n### Results for WL_K_Embedding ###'
    th = 0.2
    K_wl = WL_K_embedding(X, th)
    simple_experiment(K_wl, y, n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose, data_name=name+'_wl', plot=plot,
                      random_state=random_state)

    # SP Kernel based Embedding
    if verbose:
        print '\n### Results for SP_K_Embedding ###'
    th = 0.2
    K_sp = SP_K_embedding(X, th)
    simple_experiment(K_sp, y, n_folds=n_folds,
                      iterations=iterations,
                      verbose=verbose, data_name=name+'_sp', plot=plot,
                      random_state=random_state)
Пример #5
0
def check_instability_classification(X, y, location='', n_folds=5,
                                     iterations=10000, verbose=True, reps=100,
                                     seed=0):
    """
    """
    if verbose:
        print 'Computing embeddings...'

    X_dce = DCE_embedding(X)
    K_dce = compute_rbf_kernel_matrix(X_dce)
    X_dr = DR_embedding(X)
    K_dr = compute_rbf_kernel_matrix(X_dr)
    X_wl = WL_K_embedding(X, th=0.2)
    K_wl = compute_rbf_kernel_matrix(X_wl)

    np.random.seed(seed)
    seeds = np.random.randint(0, 10000000, reps)

    accs_dce = np.zeros(reps)
    accs_dr = np.zeros(reps)
    accs_wl = np.zeros(reps)
    pvals_dce = np.zeros(reps)
    pvals_dr = np.zeros(reps)
    pvals_wl = np.zeros(reps)

    if verbose:
        print 'Computing null distributions...'

    param_grid = [{'C': np.logspace(-5, 5, 25)}]
    yis = [np.random.permutation(y) for i in range(iterations)]
    acc_null_dce = Parallel(n_jobs=-1)(delayed(compute_svm_score_nestedCV)(K_dce, yis[i], n_folds, scoring=balanced_accuracy_scoring, param_grid=param_grid) for i in range(iterations))
    acc_null_dr  = Parallel(n_jobs=-1)(delayed(compute_svm_score_nestedCV)(K_dr , yis[i], n_folds, scoring=balanced_accuracy_scoring, param_grid=param_grid) for i in range(iterations))
    acc_null_wl  = Parallel(n_jobs=-1)(delayed(compute_svm_score_nestedCV)(K_wl , yis[i], n_folds, scoring=balanced_accuracy_scoring, param_grid=param_grid) for i in range(iterations))

    for i, s in enumerate(seeds):
        # if verbose:
        #     print 'Repetition number: % s, seed: %s' % (i, s)

        rs = np.random.RandomState(s)
        # DCE Embedding
        acc_dce = compute_svm_score_nestedCV(K_dce, y, n_folds,
                                             scoring=balanced_accuracy_scoring,
                                             random_state=rs,
                                             param_grid=param_grid)

        p_value_dce = max(1.0/iterations, (acc_null_dce > acc_dce).sum()
                          / float(iterations))
        accs_dce[i] = acc_dce
        pvals_dce[i] = p_value_dce
        # if verbose:
        #     print "DCE => acc: %s, p_value: %s" %(acc_dce, p_value_dce)

        # DR Embedding
        acc_dr = compute_svm_score_nestedCV(K_dr, y, n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            random_state=rs,
                                            param_grid=param_grid)
        p_value_dr = max(1.0/iterations, (acc_null_dr > acc_dr).sum()
                         / float(iterations))
        accs_dr[i] = acc_dr
        pvals_dr[i] = p_value_dr
        # if verbose:
        #     print "DR => acc: %s, p_value: %s" % (acc_dr, p_value_dr)

        # WL Kernel
        acc_wl = compute_svm_score_nestedCV(K_wl, y, n_folds,
                                            scoring=balanced_accuracy_scoring,
                                            random_state=rs,
                                            param_grid=param_grid)
        p_value_wl = max(1.0/iterations, (acc_null_wl > acc_wl).sum()
                         / float(iterations))
        accs_wl[i] = acc_wl
        pvals_wl[i] = p_value_wl
        # if verbose:
        #     print "WL => acc: %s, p_value: %s" %(acc_wl, p_value_wl)

    # saving the results
    if not os.path.isdir('./results_dic'):
        os.mkdir('./results_dic')

    save_dir = './results_dic'
    if verbose:
        print 'Saving results at %s' % save_dir

    res_dce = {}
    res_dce['null_distribution'] = np.array(acc_null_dce)
    res_dce['accuracies'] = accs_dce
    res_dce['p_values'] = pvals_dce

    res_dr = {}
    res_dr['null_distribution'] = np.array(acc_null_dr)
    res_dr['accuracies'] = accs_dr
    res_dr['p_values'] = pvals_dr

    res_wl = {}
    res_wl['null_distribution'] = np.array(acc_null_wl)
    res_wl['accuracies'] = accs_wl
    res_wl['p_values'] = pvals_wl

    pickle.dump(res_dce, open(os.path.join(save_dir,
                                           '%s_dce.pkl' % location),
                              'wb'))
    pickle.dump(res_dr, open(os.path.join(save_dir,
                                          '%s_dre.pkl' % location),
                             'wb'))
    pickle.dump(res_wl, open(os.path.join(save_dir,
                                          '%s_wl.pkl' % location),
                             'wb'))

    if verbose:
        print 'DCE p-values: min:%s, mean:%s, max:%s' % (np.min(pvals_dce),
                                                         np.mean(pvals_dce),
                                                         np.max(pvals_dce))
        print 'DR  p-values: min:%s, mean:%s, max:%s' % (np.min(pvals_dr),
                                                         np.mean(pvals_dr),
                                                         np.max(pvals_dr))
        print 'WL  p-values: min:%s, mean:%s, max:%s' % (np.min(pvals_wl),
                                                         np.mean(pvals_wl),
                                                         np.max(pvals_wl))

    return accs_dce, pvals_dce, accs_dr, pvals_dr, accs_wl, pvals_wl
Пример #6
0
def experiment_1000_func_conn_data(data_path='data', location='all', n_folds=5,
                                   iterations=10000, verbose=True, plot=True,
                                   random_state=None):
    """
    Run the experiments on the 1000_functional_connectome data.

    Parameters:
    ----------
    data_path: string
        Path to the folder containing the dataset.
    location: string
        If location=='all' we run the experiments for all locations, otherwise
        only the selected location is used.
    n_folds: int
        The number of folds in a StratifiedKFold cross-validation
    iterations: int
        Number of iterations to compute the null distribution of
        balanced_accuracy and MMD^2_u
    verbose: bool
    plot: bool
        Whether to plot the results of the statistical tests.
    """
    if location == 'all':
        locs = os.listdir(os.path.join(data_path, 'Functional_Connectomes',
                                       'Locations'))
        if verbose:
            print("")
            print("We will analyze the following datasets:")
            print("%s \n" % '\n'.join(locs))

    else:
        locs = [location]

    for name in locs:
        if verbose:
            print('')
            print('Working on %s dataset...' % name)
            print('-----------------------')

        X, y = load_1000_funct_connectome(data_path, name, verbose=verbose)

        # DCE Embedding
        if verbose:
            print('')
            print('### Results for DCE_Embedding ###')

        X_dce = DCE_embedding(X)
        K_dce = compute_rbf_kernel_matrix(X_dce)
        simple_experiment(K_dce, y, n_folds=n_folds,
                          iterations=iterations,
                          verbose=verbose, data_name=name+'_dce', plot=plot,
                          random_state=random_state)

        # DR Embedding
        if verbose:
            print('')
            print('### Results for DR_Embedding ###')

        X_dr = DR_embedding(X)
        K_dr = compute_rbf_kernel_matrix(X_dr)
        simple_experiment(K_dr, y, n_folds=n_folds, iterations=iterations,
                          verbose=verbose, data_name=name+'_dr', plot=plot,
                          random_state=random_state)