Пример #1
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    import brainomics.mesh_processing as mesh_utils
    mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(
        config["structure"]["mesh"])
    mask = np.load(config["structure"]["mask"])
    GLOBAL.mesh_coord, GLOBAL.mesh_triangles, GLOBAL.mask = mesh_coord, mesh_triangles, mask
    A, _ = tv_helper.nesterov_linear_operator_from_mesh(
        GLOBAL.mesh_coord, GLOBAL.mesh_triangles, GLOBAL.mask)
    GLOBAL.A = A
    GLOBAL.CONFIG = config
Пример #2
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    STRUCTURE = np.load(config["structure"])
    A = tv_helper.A_from_mask(STRUCTURE)
    N_COMP = config["N_COMP"]
    GLOBAL.A, GLOBAL.STRUCTURE,GLOBAL.N_COMP = A, STRUCTURE,N_COMP
    

    mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))
    mask = np.load(os.path.join(INPUT_BASE_DIR, "mask.npy"))
    import parsimony.functions.nesterov.tv as tv_helper
    Atv = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles, mask=mask)
    GLOBAL.Atv = Atv
    GLOBAL.FULL_RESAMPLE = config['full_resample']
Пример #3
0
import shutil

BASE_PATH = "/neurospin/brainomics/2016_schizConnect/analysis/all_studies+VIP/Freesurfer/all_subjects_less_than_30years"
TEMPLATE_PATH = os.path.join(BASE_PATH, "freesurfer_template")
INPUT_CSV = os.path.join(BASE_PATH, "population_30yo.csv")
OUTPUT = os.path.join(BASE_PATH, "data")

# Read pop csv
pop = pd.read_csv(INPUT_CSV)
np.save(
    '/neurospin/brainomics/2016_schizConnect/analysis/all_studies+VIP/Freesurfer/all_subjects_less_than_30years/data/site.npy',
    pop["site_num"].as_matrix())
#############################################################################
## Build mesh template
import brainomics.mesh_processing as mesh_utils
cor_l, tri_l = mesh_utils.mesh_arrays(
    os.path.join(TEMPLATE_PATH, "lh.pial.gii"))
cor_r, tri_r = mesh_utils.mesh_arrays(
    os.path.join(TEMPLATE_PATH, "rh.pial.gii"))
cor = np.vstack([cor_l, cor_r])
tri_r += cor_l.shape[0]
tri = np.vstack([tri_l, tri_r])
mesh_utils.mesh_from_arrays(cor,
                            tri,
                            path=os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"),
                os.path.join(OUTPUT, "lrh.pial.gii"))

#############################################################################
# Read images
n = len(pop)
assert n == 280
Пример #4
0
#enttv
penalty_start = 2
MASK_PATH = "/neurospin/brainomics/2016_icaar-eugei/results/Freesurfer/ICAAR+EUGEI/data/mask.npy"
OUTPUT = "/neurospin/brainomics/2016_icaar-eugei/results/Freesurfer/ICAAR+EUGEI/enettv/model_selection_5folds/0.5_0.56_0.24_0.2"
beta = np.load(os.path.join(OUTPUT, "beta.npz"))['arr_0']
beta, _ = array_utils.arr_threshold_from_norm2_ratio(beta, 0.99)
####################################################################################

shutil.copyfile(os.path.join(TEMPLATE_PATH, "lh.pial.gii"),
                os.path.join(OUTPUT, "lh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "rh.pial.gii"),
                os.path.join(OUTPUT, "rh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"),
                os.path.join(OUTPUT, "lrh.pial.gii"))

cor_l, tri_l = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "lh.pial.gii"))
cor_r, tri_r = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "rh.pial.gii"))
assert cor_l.shape[0] == cor_r.shape[0]

cor_both, tri_both = mesh_utils.mesh_arrays(
    os.path.join(OUTPUT, "lrh.pial.gii"))
mask__mesh = np.load(MASK_PATH)
assert mask__mesh.shape[0] == cor_both.shape[
    0] == cor_l.shape[0] * 2 == cor_l.shape[0] + cor_r.shape[0]
assert mask__mesh.shape[0], mask__mesh.sum()

# Find the mapping from components in masked mesh to left_mesh and right_mesh
# concat was initialy: cor = np.vstack([cor_l, cor_r])
mask_left__mesh = np.arange(mask__mesh.shape[0]) < mask__mesh.shape[0] / 2
mask_left__mesh[np.logical_not(mask__mesh)] = False
mask_right__mesh = np.arange(mask__mesh.shape[0]) >= mask__mesh.shape[0] / 2
Пример #5
0
import json
from brainomics import array_utils
import brainomics.mesh_processing as mesh_utils
import shutil

BASE_PATH = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/results_30yo"
MASK_PATH = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/mask.npy"
TEMPLATE_PATH = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/freesurfer_template"
OUTPUT = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/results_30yo/enettv/weight_map"

shutil.copyfile(os.path.join(TEMPLATE_PATH, "lh.pial.gii"),
                os.path.join(OUTPUT, "lh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "rh.pial.gii"),
                os.path.join(OUTPUT, "rh.pial.gii"))

cor_l, tri_l = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "lh.pial.gii"))
cor_r, tri_r = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "rh.pial.gii"))
assert cor_l.shape[0] == cor_r.shape[0]

cor_both, tri_both = mesh_utils.mesh_arrays(
    os.path.join(OUTPUT, "lrh.pial.gii"))
mask__mesh = np.load(MASK_PATH)
assert mask__mesh.shape[0] == cor_both.shape[
    0] == cor_l.shape[0] * 2 == cor_l.shape[0] + cor_r.shape[0]
assert mask__mesh.shape[0], mask__mesh.sum()

# Find the mapping from components in masked mesh to left_mesh and right_mesh
# concat was initialy: cor = np.vstack([cor_l, cor_r])
mask_left__mesh = np.arange(mask__mesh.shape[0]) < mask__mesh.shape[0] / 2
mask_left__mesh[np.logical_not(mask__mesh)] = False
mask_right__mesh = np.arange(mask__mesh.shape[0]) >= mask__mesh.shape[0] / 2
Пример #6
0
mris_convert /i2bm/local/freesurfer/subjects/fsaverage/surf/rh.pial ./rh.pial.gii



"""
import os
import numpy as np
import scipy.sparse as sparse

BASE_PATH = "/neurospin/brainomics/2013_adni/"
TEMPLATE_PATH = os.path.join(BASE_PATH, "freesurfer_template")
OUTPUT = os.path.join(BASE_PATH, "MCIc-CTL-FS")

import numpy as np
import brainomics.mesh_processing as mesh_utils
mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(
    os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))

# params

mask = np.load(os.path.join(OUTPUT, "mask.npy"))

import parsimony.functions.nesterov.tv as tv_helper
A, _ = tv_helper.nesterov_linear_operator_from_mesh(mesh_coord,
                                                    mesh_triangles,
                                                    mask=mask)
"""
# count neighbors (arrity) for each node
n_neighbors = np.array([len(n) for n in nodes_with_edges])
print np.sum(n_neighbors)
print np.sum(n_neighbors) / float(len(nodes_with_edges))
print [[n, np.sum(n_neighbors == n)] for n in np.unique(n_neighbors)]
Пример #7
0
BASE_PATH = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST"
INPUT_FS = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/freesurfer_assembled_data_fsaverage"
TEMPLATE_PATH = os.path.join(BASE_PATH, "Freesurfer", "freesurfer_template")

INPUT_CSV = os.path.join(BASE_PATH, "Freesurfer", "population_50yo.csv")
OUTPUT = os.path.join(BASE_PATH, "Freesurfer", "data", "50yo")
penalty_start = 2

# Read pop csv
pop = pd.read_csv(INPUT_CSV)

#############################################################################
## Build mesh template
import brainomics.mesh_processing as mesh_utils
cor_l, tri_l = mesh_utils.mesh_arrays(
    os.path.join(TEMPLATE_PATH, "lh.pial.gii"))
cor_r, tri_r = mesh_utils.mesh_arrays(
    os.path.join(TEMPLATE_PATH, "rh.pial.gii"))
cor = np.vstack([cor_l, cor_r])
tri_r += cor_l.shape[0]
tri = np.vstack([tri_l, tri_r])
mesh_utils.mesh_from_arrays(cor,
                            tri,
                            path=os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"),
                os.path.join(OUTPUT, "lrh.pial.gii"))

#############################################################################
# Read images
n = len(pop)
assert n == 226
Пример #8
0
    os.mkdir(OUTPUT)


TEMPLATE_PATH = os.path.join(BASE_PATH, "freesurfer_template")
shutil.copyfile(os.path.join(TEMPLATE_PATH, "lh.pial.gii"), os.path.join(OUTPUT, "lh.pial.gii"))
shutil.copyfile(os.path.join(TEMPLATE_PATH, "rh.pial.gii"), os.path.join(OUTPUT, "rh.pial.gii"))


config  = json.load(open("config_5cv.json"))

#from soma import aims
#os.path.join(OUTPUT, "lh.pial.gii")
#mesh = aims.read(os.path.join(OUTPUT, "lh.pial.gii"))
#mesh.header()

cor_l, tri_l = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "lh.pial.gii"))
cor_r, tri_r = mesh_utils.mesh_arrays(os.path.join(OUTPUT, "rh.pial.gii"))
assert cor_l.shape[0] == cor_r.shape[0] == 163842

cor_both, tri_both = mesh_utils.mesh_arrays(config["structure"]["mesh"])
mask__mesh = np.load(config["structure"]["mask"])
assert mask__mesh.shape[0] == cor_both.shape[0] == cor_l.shape[0] * 2 ==  cor_l.shape[0] + cor_r.shape[0]
assert mask__mesh.shape[0], mask__mesh.sum() == (327684, 317089)

# Find the mapping from beta in masked mesh to left_mesh and right_mesh
# concat was initialy: cor = np.vstack([cor_l, cor_r])
mask_left__mesh = np.arange(mask__mesh.shape[0])  < mask__mesh.shape[0] / 2
mask_left__mesh[np.logical_not(mask__mesh)] = False
mask_right__mesh = np.arange(mask__mesh.shape[0]) >= mask__mesh.shape[0] / 2
mask_right__mesh[np.logical_not(mask__mesh)] = False
assert mask__mesh.sum() ==  (mask_left__mesh.sum() + mask_right__mesh.sum())
Пример #9
0
def init():
    INPUT_DATA_X = os.path.join(WD_ORIGINAL, 'X.npy')
    INPUT_DATA_y = os.path.join(WD_ORIGINAL, 'y.npy')
    INPUT_MASK_PATH = os.path.join(WD_ORIGINAL, 'mask.npy')
    INPUT_MESH_PATH = '/neurospin/brainomics/2013_adni/MCIc-CTL-FS_cs/lrh.pial.gii'
    #INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/Atv.npz'
    # INPUT_CSV = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/population_30yo.csv'

    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_MESH_PATH, WD)

    #shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    ## Create config file
    os.chdir(WD)
    X = np.load("X.npy")
    y = np.load("y.npy")

    if not os.path.exists(os.path.join(WD, "Atv.npz")):
        import brainomics.mesh_processing as mesh_utils
        cor, tri = mesh_utils.mesh_arrays(os.path.join(WD, "lrh.pial.gii"))
        mask = np.load(os.path.join(WD, 'mask.npy'))

        import parsimony.functions.nesterov.tv as nesterov_tv
        from parsimony.utils.linalgs import LinearOperatorNesterov
        Atv = nesterov_tv.linear_operator_from_mesh(cor, tri, mask, calc_lambda_max=True)
        Atv.save(os.path.join(WD, "Atv.npz"))
        Atv_ = LinearOperatorNesterov(filename=os.path.join(WD, "Atv.npz"))
        assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
        assert np.allclose(Atv_.get_singular_values(0), 8.999, rtol=1e-03, atol=1e-03)
        assert np.all([a.shape == (317089, 317089) for a in Atv])

    if not os.path.exists(os.path.join(WD, "beta_start.npz")):
        betas = dict()
        import time
        alphas = [.01, 0.1, 1.0, 10]
        for alpha in alphas:
            mod = estimators.RidgeLogisticRegression(l=alpha, class_weight="auto", penalty_start=penalty_start)
            t_ = time.time()
            mod.fit(X, y.ravel())
            print(time.time() - t_) # 11564
            betas["lambda_%.2f" % alpha] = mod.beta

        np.savez(os.path.join(WD, "beta_start.npz"), **betas)
        beta_start = np.load(os.path.join(WD, "beta_start.npz"))
        assert np.all([np.all(beta_start[a] == betas[a]) for a in beta_start.keys()])

    ## Create config file

    #  ########################################################################
    #  Setting 1: 5cv + large range of parameters: cv_largerange
    #  with sub-sample training set with size 50, 100
    # 5cv/cv0*[_sub50]/refit/*

    # sub_sizes = [50, 100]
    sub_sizes = []

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()

    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        # Simple CV
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]

        # Nested CV
        # cv_inner = StratifiedKFold(y[tr_val].ravel(), n_folds=NFOLDS_INNER, random_state=42)
        # for cv_inner_i, (tr, val) in enumerate(cv_inner):
        #     cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

        # Sub-sample training set with size 50, 100
        # => cv*_sub[50|100]/refit
        grps = np.unique(y[tr_val]).astype(int)
        ytr = y.copy()
        ytr[te] = np.nan
        g_idx = [np.where(ytr == g)[0] for g in grps]
        assert np.all([np.all(ytr[g_idx[g]] == g) for g in grps])

        g_size = np.array([len(g) for g in g_idx])
        g_prop = g_size / g_size.sum()

        for sub_size in sub_sizes:
            # sub_size = sub_sizes[0]
            sub_g_size = np.round(g_prop * sub_size).astype(int)
            g_sub_idx = [np.random.choice(g_idx[g], sub_g_size[g], replace=False) for g in grps]
            assert np.all([np.all(y[g_sub_idx[g]] == g) for g in grps])
            tr_val_sub = np.concatenate(g_sub_idx)
            assert len(tr_val_sub) == sub_size
            assert np.all([idx in tr_val for idx in tr_val_sub])
            assert np.all(np.logical_not([idx in te for idx in tr_val_sub]))
            cv["cv%02d_sub%i/refit" % (cv_outer_i, sub_size)] = [tr_val_sub, te]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}

    # Nested CV
    # assert len(cv_largerange) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1

    # Simple CV
    # assert len(cv) == NFOLDS_OUTER + 1

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(cv) == NFOLDS_OUTER * (1 + len(sub_sizes)) + 1

    print(list(cv.keys()))

    # Large grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1, 1.0] # first ran with this grid
    tv_ratio = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    l1l2_ratio = [0.1, 0.5, 0.9]
    # l1l2_ratio = [0, 0.1, 0.5, 0.9, 1.0] # first ran with this grid
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 240 # old 300

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) ==  12 # old 15

    params = params_enet_tvgn + params_enet
    assert len(params) == 252 # 315
    # Simple CV
    # assert len(params) * len(cv) == 1890

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(params) * len(cv) == 1512 # 5040

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_cv_largerange.json"), "w"))


    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_cv_largerange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_cv_largerange",
                                            freecores=2)

    #  ########################################################################
    #  Setting 2: dcv + reduced range of parameters: dcv_reducedrange
    #  5cv/cv0*/cvnested0*/*

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()
    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]
        cv_inner = StratifiedKFold(n_splits=NFOLDS_INNER, random_state=42).split(np.zeros(y[tr_val].shape[0]), y[tr_val].ravel())
        for cv_inner_i, (tr, val) in enumerate(cv_inner):
            cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}
    #assert len(cv) == NFOLDS_OUTER + 1
    assert len(cv) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1
    print(list(cv.keys()))

    # Reduced grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1] # original
    tv_ratio = [0.2, 0.8]
    l1l2_ratio = [0.1, 0.9]
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 32 # 16

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) == 8 # 4

    params = params_enet_tvgn + params_enet
    assert len(params) == 40 # 20
    assert len(params) * len(cv) == 1240 # 620

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_dcv_reducedrange.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_dcv_reducedrange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_dcv_reducedrange",
                                            freecores=2)