示例#1
0
def one_edge_tv(coord_ij, tria_ij, data_ij, Y_ij, params):

    A_ij = tv.linear_operator_from_mesh(coord_ij, tria_ij)

    X_train, X_test, y_train, y_test = train_test_split(data_ij,
                                                        Y_ij,
                                                        shuffle=True,
                                                        random_state=1,
                                                        test_size=0.33)

    tv_reg = LinearRegressionL1L2TV(l1=params['l1'],
                                    l2=params['l2'],
                                    tv=params['tv'],
                                    A=A_ij)

    tv_reg.fit(X_train, y_train)

    y_train_pred = tv_reg.predict(X_train)
    y_test_pred = tv_reg.predict(X_test)

    mse_test = mean_squared_error(y_test, y_test_pred)
    r2_test = r2_score(y_test, y_test_pred_tv)
    print('TRAIN MSE TV: {}, TEST MSE TV: {}'.format(
        mean_squared_error(y_train, y_train_pred), mse_test))

    print('TRAIN R2 TV: {}, TEST R2 TV: {}\n'.format(
        r2_score(y_train, y_train_pred), r2_test))

    return r2_test
 def test_tvhelper_linear_operator_from_mesh(self):
     import parsimony.functions.nesterov.tv as tv_helper
     mesh_coord = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]])
     mesh_triangles = np.array([[0 ,1, 3], [0, 2 ,3], [2, 3, 5], [2, 4, 5]])
     A = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles)
     a =[[np.where(l)[0].tolist() for l in a.toarray()] for a in A]
     b = [[[], [0, 1], [0, 2], [0, 3], [2, 4], [2, 5]],
          [[], [],     [],     [1, 3], [],     [3, 5]],
          [[], [],     [],     [2, 3], [],     [4, 5]]]
     assert a == b
示例#3
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    import brainomics.mesh_processing as mesh_utils
    mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(
        config["structure"]["mesh"])
    mask = np.load(config["structure"]["mask"])
    GLOBAL.mesh_coord, GLOBAL.mesh_triangles, GLOBAL.mask = mesh_coord, mesh_triangles, mask
    A = tv_helper.linear_operator_from_mesh(GLOBAL.mesh_coord,
                                            GLOBAL.mesh_triangles, GLOBAL.mask)
    GLOBAL.A = A
    GLOBAL.CONFIG = config
示例#4
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    STRUCTURE = np.load(config["structure"])
    A = tv_helper.A_from_mask(STRUCTURE)
    N_COMP = config["N_COMP"]
    GLOBAL.A, GLOBAL.STRUCTURE,GLOBAL.N_COMP = A, STRUCTURE,N_COMP
    

    mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))
    mask = np.load(os.path.join(INPUT_BASE_DIR, "mask.npy"))
    import parsimony.functions.nesterov.tv as tv_helper
    Atv = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles, mask=mask)
    GLOBAL.Atv = Atv
    GLOBAL.FULL_RESAMPLE = config['full_resample']
示例#5
0
np.save(os.path.join(OUTPUT, "mask.npy"), mask)

X = Xtot[:, mask]
assert X.shape == (280, 299806)

#############################################################################

X = np.hstack([Z, X])
assert X.shape == (280, 299809)
#Remove nan lines
X = X[np.logical_not(np.isnan(y)).ravel(), :]
y = y[np.logical_not(np.isnan(y))]
assert X.shape == (280, 299809)

np.save(os.path.join(OUTPUT, "X.npy"), X)
np.save(os.path.join(OUTPUT, "y.npy"), y)

#############################################################################
import parsimony.functions.nesterov.tv as nesterov_tv
from parsimony.utils.linalgs import LinearOperatorNesterov

Atv = nesterov_tv.linear_operator_from_mesh(cor,
                                            tri,
                                            mask,
                                            calc_lambda_max=True)
Atv.save(os.path.join(OUTPUT, "Atv.npz"))
Atv_ = LinearOperatorNesterov(filename=os.path.join(OUTPUT, "Atv.npz"))
assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
assert np.allclose(Atv_.get_singular_values(0), 8.999, rtol=1e-03, atol=1e-03)
assert np.all([a.shape == (299806, 299806) for a in Atv])
MODELS["2d_l1l2tv_inexactfista"] = \
    estimators.LogisticRegressionL1L2TVInexactFISTA(
        l1, l2, tv, Al1tv,
        algorithm_params=algorithm_params)

MODELS["2d_l1l2tv_inter_inexactfista"] = \
    estimators.LogisticRegressionL1L2TVInexactFISTA(
        l1, l2, tv, Al1tv,
        penalty_start=1,
        algorithm_params=algorithm_params)

## Get data structure from mesh

# build a cylinder mesh with the same topology than the 2D grid
xyz, tri = mesh.cylinder(shape[1], shape[0])
Atvmesh = nesterov_tv.linear_operator_from_mesh(xyz, tri)

MODELS["mesh_l1l2tv_conesta"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, Atvmesh,
        algorithm=algorithms.proximal.CONESTA(),
        algorithm_params=algorithm_params)

MODELS["mesh_l1l2tv_inter_conesta"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, Atvmesh,
        penalty_start=1,
        algorithm=algorithms.proximal.CONESTA(),
        algorithm_params=algorithm_params)

Atvl1mesh = l1tv.linear_operator_from_mesh(xyz, tri)
MODELS["2d_l1l2tv_inexactfista"] = \
    estimators.LogisticRegressionL1L2TVInexactFISTA(
        l1, l2, tv, Al1tv,
        algorithm_params=algorithm_params)

MODELS["2d_l1l2tv_inter_inexactfista"] = \
    estimators.LogisticRegressionL1L2TVInexactFISTA(
        l1, l2, tv, Al1tv,
        penalty_start=1,
        algorithm_params=algorithm_params)

## Get data structure from mesh

# build a cylinder mesh with the same topology than the 2D grid
xyz, tri = mesh.cylinder(shape[1], shape[0])
Atvmesh = nesterov_tv.linear_operator_from_mesh(xyz, tri)

MODELS["mesh_l1l2tv_conesta"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, Atvmesh,
        algorithm=algorithms.proximal.CONESTA(),
        algorithm_params=algorithm_params)

MODELS["mesh_l1l2tv_inter_conesta"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, Atvmesh,
        penalty_start=1,
        algorithm=algorithms.proximal.CONESTA(),
        algorithm_params=algorithm_params)

Atvl1mesh = l1tv.linear_operator_from_mesh(xyz, tri)
示例#8
0
   
global_pen = 0.1
tv_ratio =1e-05# 0.5
l1_ratio = 0.1

ltv = global_pen * tv_ratio
ll1 = l1_ratio * global_pen * (1 - tv_ratio)
ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
assert(np.allclose(ll1 + ll2 + ltv, global_pen))



mesh_coord, mesh_triangles = mesh_utils.mesh_arrays(os.path.join(TEMPLATE_PATH, "lrh.pial.gii"))
mask = np.load(os.path.join(INPUT_BASE_DIR, "mask.npy"))
import parsimony.functions.nesterov.tv as tv_helper
Atv = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles, mask=mask)
     

# PARSIMONY
########################################
from parsimony.algorithms.utils import AlgorithmSnapshot
snapshot = AlgorithmSnapshot('/neurospin/brainomics/2014_pca_struct/adni/adni_time/enet_1e-6/',saving_period=1).save_conesta

mod = pca_tv.PCA_L1_L2_TV(n_components=3,
                                l1=ll1, l2=ll2, ltv=ltv,
                                Atv=Atv,
                                criterion="frobenius",
                                eps=1e-6,
                                inner_eps=1e-1,
                                max_iter=100,
                                inner_max_iter=int(1e4),
示例#9
0
def init():
    INPUT_DATA_X = os.path.join(WD_ORIGINAL, 'X.npy')
    INPUT_DATA_y = os.path.join(WD_ORIGINAL, 'y.npy')
    INPUT_MASK_PATH = os.path.join(WD_ORIGINAL, 'mask.npy')
    INPUT_MESH_PATH = '/neurospin/brainomics/2013_adni/MCIc-CTL-FS_cs/lrh.pial.gii'
    #INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/Atv.npz'
    # INPUT_CSV = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/population_30yo.csv'

    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_MESH_PATH, WD)

    #shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    ## Create config file
    os.chdir(WD)
    X = np.load("X.npy")
    y = np.load("y.npy")

    if not os.path.exists(os.path.join(WD, "Atv.npz")):
        import brainomics.mesh_processing as mesh_utils
        cor, tri = mesh_utils.mesh_arrays(os.path.join(WD, "lrh.pial.gii"))
        mask = np.load(os.path.join(WD, 'mask.npy'))

        import parsimony.functions.nesterov.tv as nesterov_tv
        from parsimony.utils.linalgs import LinearOperatorNesterov
        Atv = nesterov_tv.linear_operator_from_mesh(cor, tri, mask, calc_lambda_max=True)
        Atv.save(os.path.join(WD, "Atv.npz"))
        Atv_ = LinearOperatorNesterov(filename=os.path.join(WD, "Atv.npz"))
        assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
        assert np.allclose(Atv_.get_singular_values(0), 8.999, rtol=1e-03, atol=1e-03)
        assert np.all([a.shape == (317089, 317089) for a in Atv])

    if not os.path.exists(os.path.join(WD, "beta_start.npz")):
        betas = dict()
        import time
        alphas = [.01, 0.1, 1.0, 10]
        for alpha in alphas:
            mod = estimators.RidgeLogisticRegression(l=alpha, class_weight="auto", penalty_start=penalty_start)
            t_ = time.time()
            mod.fit(X, y.ravel())
            print(time.time() - t_) # 11564
            betas["lambda_%.2f" % alpha] = mod.beta

        np.savez(os.path.join(WD, "beta_start.npz"), **betas)
        beta_start = np.load(os.path.join(WD, "beta_start.npz"))
        assert np.all([np.all(beta_start[a] == betas[a]) for a in beta_start.keys()])

    ## Create config file

    #  ########################################################################
    #  Setting 1: 5cv + large range of parameters: cv_largerange
    #  with sub-sample training set with size 50, 100
    # 5cv/cv0*[_sub50]/refit/*

    # sub_sizes = [50, 100]
    sub_sizes = []

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()

    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        # Simple CV
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]

        # Nested CV
        # cv_inner = StratifiedKFold(y[tr_val].ravel(), n_folds=NFOLDS_INNER, random_state=42)
        # for cv_inner_i, (tr, val) in enumerate(cv_inner):
        #     cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

        # Sub-sample training set with size 50, 100
        # => cv*_sub[50|100]/refit
        grps = np.unique(y[tr_val]).astype(int)
        ytr = y.copy()
        ytr[te] = np.nan
        g_idx = [np.where(ytr == g)[0] for g in grps]
        assert np.all([np.all(ytr[g_idx[g]] == g) for g in grps])

        g_size = np.array([len(g) for g in g_idx])
        g_prop = g_size / g_size.sum()

        for sub_size in sub_sizes:
            # sub_size = sub_sizes[0]
            sub_g_size = np.round(g_prop * sub_size).astype(int)
            g_sub_idx = [np.random.choice(g_idx[g], sub_g_size[g], replace=False) for g in grps]
            assert np.all([np.all(y[g_sub_idx[g]] == g) for g in grps])
            tr_val_sub = np.concatenate(g_sub_idx)
            assert len(tr_val_sub) == sub_size
            assert np.all([idx in tr_val for idx in tr_val_sub])
            assert np.all(np.logical_not([idx in te for idx in tr_val_sub]))
            cv["cv%02d_sub%i/refit" % (cv_outer_i, sub_size)] = [tr_val_sub, te]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}

    # Nested CV
    # assert len(cv_largerange) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1

    # Simple CV
    # assert len(cv) == NFOLDS_OUTER + 1

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(cv) == NFOLDS_OUTER * (1 + len(sub_sizes)) + 1

    print(list(cv.keys()))

    # Large grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1, 1.0] # first ran with this grid
    tv_ratio = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    l1l2_ratio = [0.1, 0.5, 0.9]
    # l1l2_ratio = [0, 0.1, 0.5, 0.9, 1.0] # first ran with this grid
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 240 # old 300

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) ==  12 # old 15

    params = params_enet_tvgn + params_enet
    assert len(params) == 252 # 315
    # Simple CV
    # assert len(params) * len(cv) == 1890

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(params) * len(cv) == 1512 # 5040

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_cv_largerange.json"), "w"))


    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_cv_largerange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_cv_largerange",
                                            freecores=2)

    #  ########################################################################
    #  Setting 2: dcv + reduced range of parameters: dcv_reducedrange
    #  5cv/cv0*/cvnested0*/*

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()
    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]
        cv_inner = StratifiedKFold(n_splits=NFOLDS_INNER, random_state=42).split(np.zeros(y[tr_val].shape[0]), y[tr_val].ravel())
        for cv_inner_i, (tr, val) in enumerate(cv_inner):
            cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}
    #assert len(cv) == NFOLDS_OUTER + 1
    assert len(cv) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1
    print(list(cv.keys()))

    # Reduced grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1] # original
    tv_ratio = [0.2, 0.8]
    l1l2_ratio = [0.1, 0.9]
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 32 # 16

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) == 8 # 4

    params = params_enet_tvgn + params_enet
    assert len(params) == 40 # 20
    assert len(params) * len(cv) == 1240 # 620

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_dcv_reducedrange.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_dcv_reducedrange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_dcv_reducedrange",
                                            freecores=2)