Пример #1
0
def test_generators(N=100, testpct=0.2, nchunks=5, nfolds=5):
    ntest = int(N * (1. / nfolds))
    ntrain = N - ntest
    alltrn = []
    folds = tikutils.generate_trnval_folds(N,
                                           'cv',
                                           testpct=testpct,
                                           nfolds=nfolds,
                                           nchunks=nchunks)
    for idx, (trn, val) in enumerate(list(folds)):
        # none of the trn is in the val
        assert np.in1d(trn, val).sum() == 0
        assert np.in1d(val, trn).sum() == 0
        assert len(np.unique(np.r_[val, trn])) == N
        assert ntrain + nchunks >= len(trn) >= ntrain - nchunks

    ntest = int(N * testpct)
    ntrain = int(np.ceil(N - ntest))
    remainder = np.mod(ntrain, nchunks)
    nfolds = 10
    folds = tikutils.generate_trnval_folds(N,
                                           'nbb',
                                           nfolds=nfolds,
                                           testpct=testpct,
                                           nchunks=nchunks)
    for idx, (trn, val) in enumerate(list(folds)):
        # none of the trn is in the val
        assert np.in1d(trn, val).sum() == 0
        assert np.in1d(val, trn).sum() == 0
        assert (len(trn) == ntrain - remainder) or (len(trn)
                                                    == ntrain - nchunks)
    assert idx + 1 == nfolds

    nfolds = 100
    folds = tikutils.generate_trnval_folds(N,
                                           'mbb',
                                           nfolds=nfolds,
                                           testpct=testpct,
                                           nchunks=nchunks)
    for idx, (trn, val) in enumerate(list(folds)):
        # none of the trn is in the val
        assert np.in1d(trn, val).sum() == 0
        assert np.in1d(val, trn).sum() == 0
        assert len(trn) == (ntrain - remainder) or (len(trn)
                                                    == ntrain - nchunks)
    assert idx + 1 == nfolds
Пример #2
0
def test_hyperopt_crossval():
    from tikreg import models
    delays = np.arange(10)
    ndelays = len(delays)

    features_train, features_test, responses_train, responses_test = get_abc_data(
    )
    features_sizes = [fs.shape[1] for fs in features_train]

    feature_priors = [sps.SphericalPrior(fs) for fs in features_train]
    temporal_prior = tps.SmoothnessPrior(delays,
                                         hhparams=np.linspace(0, 10, 5))

    folds = tikutils.generate_trnval_folds(
        responses_train.shape[0],
        sampler='bcv',
        nfolds=(1, 5),
    )
    folds = list(folds)

    import time
    from hyperopt import hp

    start_time = time.time()
    cvresults = models.hyperopt_crossval_stem_wmvnp(
        features_train,
        responses_train,
        temporal_prior=temporal_prior,
        feature_priors=feature_priors,
        spatial_sampler=[
            hp.loguniform('A', 0, 7),
            hp.loguniform('B', 0, 7),
            hp.loguniform('C', 0, 7),
        ],
        ridge_sampler=False,
        temporal_sampler=hp.uniform('temporal', 0, 10),
        ntrials=100,
        method='Chol',
        verbosity=2,
        folds=folds,
    )

    print(time.time() - start_time)
    internal_best = cvresults.trial_attachments(
        cvresults.trials[cvresults.best_trial['tid']])['internals']
    import pickle
    oo = pickle.loads(internal_best)
Пример #3
0
def test_hyperopt_functionality():
    import hyperopt
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

    delays = np.arange(5)  #np.unique(np.random.randint(0,10,10))
    ndelays = len(delays)

    features_train, features_test, responses_train, responses_test = get_abc_data(
    )
    features_sizes = [fs.shape[1] for fs in features_train]

    feature_priors = [
        sps.SphericalPrior(features_sizes[0]),
        sps.SphericalPrior(features_sizes[1]),
        sps.SphericalPrior(features_sizes[2]),
    ]

    tpriors = [tps.SphericalPrior(delays)]
    # tpriors = [tps.SmoothnessPrior(delays, hhparams=np.linspace(0,10,5))]
    temporal_prior = tpriors[0]

    folds = tikutils.generate_trnval_folds(
        responses_train.shape[0],
        sampler='bcv',
        nfolds=(1, 5),
    )
    folds = list(folds)

    # count = 0
    # def increase_count_by_one():
    #     global count    # Needed to modify global copy of globvar
    #     count = count + 1

    def objective(params):
        # increase_count_by_one()
        feature_hyparams = params[:-1]
        scale_hyparams = params[-1]

        temporal_prior.set_hhparameters(1.)

        for fi, feature_prior in enumerate(feature_priors[1:]):
            feature_prior.set_hyparams(feature_hyparams[fi])

        # does not affect
        feature_priors[0].set_hyparams(1.)
        res = models.crossval_stem_wmvnp(
            features_train,
            responses_train,
            ridges=np.asarray([scale_hyparams]),
            normalize_kernel=False,
            temporal_prior=temporal_prior,
            feature_priors=feature_priors,
            folds=(2, 5),
            method='SVD',
            verbosity=2,
        )
        cvres = res['cvresults'].mean(0).mean(-1).mean()
        print('features:', feature_hyparams)
        print('ridges:', scale_hyparams)
        print(res['spatial'], res['temporal'], res['ridges'])
        print(cvres)
        return (1 - cvres)**2

    space = (
        hp.loguniform('rB', 0, 7),
        hp.loguniform('rC', 0, 7),
        hp.loguniform('ridge', -7, 7),
    )

    ntrials = 100
    trials = Trials()

    best_params = fmin(objective,
                       space=space,
                       algo=tpe.suggest,
                       max_evals=ntrials,
                       trials=trials)

    print(best_params)
Пример #4
0
def test_fullfit(n=100, p=50, population_optimal=False):
    ridges = np.logspace(-3, 3, 10)
    nridges = len(ridges)
    ndelays = 5
    delays = range(ndelays)

    oo = get_abc_data(banded=True, n=n, p=p)
    features_train, features_test, responses_train, responses_test = oo
    features_sizes = [fs.shape[1] for fs in features_train]

    hyparams = np.logspace(0, 3, 5)
    spatial_priors = [
        sps.SphericalPrior(features_sizes[0], hyparams=[1.]),
        sps.SphericalPrior(features_sizes[1], hyparams=hyparams),
        sps.SphericalPrior(features_sizes[2], hyparams=hyparams),
    ]

    temporal_prior = tps.SphericalPrior(delays)
    folds = tikutils.generate_trnval_folds(
        responses_train.shape[0],
        sampler='bcv',
        nfolds=(1, 5),
    )
    folds = list(folds)

    res = models.estimate_stem_wmvnp(
        features_train,
        responses_train,
        features_test,
        responses_test,
        ridges=ridges,
        normalize_kernel=True,
        temporal_prior=temporal_prior,
        feature_priors=spatial_priors,
        weights=True,
        performance=True,
        predictions=True,
        population_optimal=population_optimal,
        folds=(1, 5),
        method='SVD',
        verbosity=1,
        cvresults=None,
    )

    for rdx in range(responses_train.shape[-1]):
        if population_optimal:
            assert res['optima'].shape[0] == 1
            optima = res['optima'][0]
        else:
            optima = res['optima'][rdx]

        temporal_opt, spatial_opt, ridge_scale = optima[0], optima[
            1:-1], optima[-1]

        Ktrain = 0.
        Ktest = 0.
        this_temporal_prior = temporal_prior.get_prior(hhparam=temporal_opt)
        for fdx, (fs_train, fs_test, fs_prior, fs_hyper) in enumerate(
                zip(features_train, features_test, spatial_priors,
                    spatial_opt)):
            Ktrain += models.kernel_spatiotemporal_prior(
                fs_train,
                this_temporal_prior,
                fs_prior.get_prior(fs_hyper),
                delays=temporal_prior.delays)

            if fs_test is not None:
                Ktest += models.kernel_spatiotemporal_prior(
                    fs_train,
                    this_temporal_prior,
                    fs_prior.get_prior(fs_hyper),
                    delays=temporal_prior.delays,
                    Xtest=fs_test)

        if np.allclose(Ktest, 0.0):
            Ktest = None

        # solve for this response
        response_solution = models.solve_l2_dual(Ktrain,
                                                 responses_train[:, [rdx]],
                                                 Ktest=Ktest,
                                                 Ytest=responses_test[:,
                                                                      [rdx]],
                                                 ridges=[ridge_scale],
                                                 performance=True,
                                                 predictions=True,
                                                 weights=True,
                                                 verbose=1,
                                                 method='SVD')

        for k, v in response_solution.items():
            # compare each vector output
            assert np.allclose(res[k][:, rdx].squeeze(),
                               response_solution[k].squeeze())
Пример #5
0
def test_cv_api(show_figures=False, ntest=50):
    # if show_figures=True, this function will create
    # images of the temporal priors, and the feature prior hyparams in 3D

    ridges = [0., 1e-03, 1., 10.0, 100.]
    nridges = len(ridges)
    ndelays = 10
    delays = range(ndelays)

    features_train, features_test, responses_train, responses_test = get_abc_data(
    )
    features_sizes = [fs.shape[1] for fs in features_train]

    spatial_priors = [
        sps.SphericalPrior(features_sizes[0]),
        sps.SphericalPrior(features_sizes[1], hyparams=np.logspace(-3, 3, 7)),
        sps.SphericalPrior(features_sizes[2], hyparams=np.logspace(-3, 3, 7)),
    ]

    # do not scale first. this removes duplicates
    spatial_priors[0].set_hyparams(1.0)

    # non-diagonal hyper-prior
    W = np.random.randn(ndelays, ndelays)
    W = np.dot(W.T, W)

    tpriors = [
        tps.SphericalPrior(delays),
        tps.SmoothnessPrior(delays, hhparams=np.logspace(-3, 1, 8)),
        tps.SmoothnessPrior(delays, wishart=True),
        tps.SmoothnessPrior(delays, wishart=False),
        tps.SmoothnessPrior(delays, wishart=W, hhparams=np.logspace(-3, 3, 5)),
        tps.GaussianKernelPrior(delays,
                                hhparams=np.linspace(1, ndelays / 2, ndelays)),
        tps.HRFPrior([1] if delays == [0] else delays),
    ]

    nfolds = (1, 5)  # 1 times 5-fold cross-validation
    folds = tikutils.generate_trnval_folds(responses_train.shape[0],
                                           sampler='bcv',
                                           nfolds=nfolds)
    nfolds = np.prod(nfolds)

    for ntp, temporal_prior in enumerate(tpriors):
        print(temporal_prior)

        all_temporal_hypers = [temporal_prior.get_hhparams()]
        all_spatial_hypers = [t.get_hyparams() for t in spatial_priors]

        # get all combinations of hyparams
        all_hyperparams = list(
            itertools.product(*(all_temporal_hypers + all_spatial_hypers)))
        nspatial_hyperparams = np.prod([len(t) for t in all_spatial_hypers])
        ntemporal_hyperparams = np.prod([len(t) for t in all_temporal_hypers])

        population_mean = False
        results = np.zeros(
            (nfolds, ntemporal_hyperparams, nspatial_hyperparams, nridges,
             1 if population_mean else responses_train.shape[-1]),
            dtype=[
                ('fold', np.float32),
                ('tp', np.float32),
                ('sp', np.float32),
                ('ridges', np.float32),
                ('responses', np.float32),
            ])

        for hyperidx, spatiotemporal_hyperparams in enumerate(all_hyperparams):
            temporal_hyperparam = spatiotemporal_hyperparams[0]
            spatial_hyperparams = spatiotemporal_hyperparams[1:]
            spatial_hyperparams /= np.linalg.norm(spatial_hyperparams)

            # get indices
            shyperidx = np.mod(hyperidx, nspatial_hyperparams)
            thyperidx = int(hyperidx // nspatial_hyperparams)
            print(thyperidx,
                  temporal_hyperparam), (shyperidx, spatial_hyperparams)

            this_temporal_prior = temporal_prior.get_prior(
                alpha=1.0, hhparam=temporal_hyperparam)

            if show_figures:
                from matplotlib import pyplot as plt

                if (hyperidx == 0) and (ntp == 0):
                    # show points in 3D
                    from tikreg import priors
                    cartesian_points = [
                        t[1:] / np.linalg.norm(t[1:]) for t in all_hyperparams
                    ]
                    angles = priors.cartesian2polar(
                        np.asarray(cartesian_points))
                    priors.show_spherical_angles(angles[:, 0], angles[:, 1])

                if hyperidx == 0:
                    # show priors with different hyper-priors
                    oldthyper = 0
                    plt.matshow(this_temporal_prior, cmap='inferno')
                else:
                    if thyperidx > oldthyper:
                        oldthyper = thyperidx
                        plt.matshow(this_temporal_prior, cmap='inferno')

            # only run a few
            if hyperidx > ntest:
                continue

            Ktrain = 0.
            Kval = 0.

            for fdx, (fs_train, fs_test, fs_prior, fs_hyper) in enumerate(
                    zip(features_train, features_test, spatial_priors,
                        spatial_hyperparams)):

                kernel_train = models.kernel_spatiotemporal_prior(
                    fs_train,
                    this_temporal_prior,
                    fs_prior.get_prior(fs_hyper),
                    delays=delays)
                Ktrain += kernel_train

            kernel_normalizer = tikutils.determinant_normalizer(Ktrain)
            Ktrain /= float(kernel_normalizer)

            # cross-validation
            for ifold, (trnidx, validx) in enumerate(folds):
                ktrn = tikutils.fast_indexing(Ktrain, trnidx, trnidx)
                kval = tikutils.fast_indexing(Ktrain, validx, trnidx)

                fit = models.solve_l2_dual(ktrn,
                                           responses_train[trnidx],
                                           kval,
                                           responses_train[validx],
                                           ridges=ridges,
                                           verbose=False,
                                           performance=True)
                if population_mean:
                    cvfold = np.nan_to_num(fit['performance']).mean(-1)[...,
                                                                        None]
                else:
                    cvfold = fit['performance']
                results[ifold, thyperidx, shyperidx] = cvfold
Пример #6
0
def test_general_solution(temporal_prior_name='spherical'):
    tprior_names = ['spherical', 'smooth', 'hrf', 'gaussian']
    normalize_kernel = False
    method = 'SVD'

    # make sure we can recover the ridge solution
    ridges = np.round(np.logspace(1, 3, 5), 4)
    nridges = len(ridges)

    delays = range(10)  #np.unique(np.random.randint(0,10,10))
    ndelays = len(delays)

    features_train, features_test, responses_train, responses_test = get_abc_data(
    )
    features_sizes = [fs.shape[1] for fs in features_train]

    # custom effective low-rank prior
    a = np.random.randn(features_train[-1].shape[-1], 3)
    sigma_x = np.dot(a, a.T) + np.identity(a.shape[0])
    spatial_priors = [
        sps.SphericalPrior(features_sizes[0], hyparams=[1]),
        sps.SphericalPrior(features_sizes[1], hyparams=[0.1, 1]),
        sps.CustomPrior(sigma_x, hyparams=[0.1, 1])
    ]

    tpriors = [
        tps.SphericalPrior(delays),
        tps.SmoothnessPrior(delays, wishart=False),
        tps.HRFPrior(delays),
        tps.GaussianKernelPrior(delays),
    ]
    tpidx = tprior_names.index(temporal_prior_name)
    temporal_prior = tpriors[tpidx]

    folds = tikutils.generate_trnval_folds(
        responses_train.shape[0],
        sampler='bcv',
        nfolds=(1, 5),
    )
    folds = list(folds)
    res = models.crossval_stem_wmvnp(
        features_train,
        responses_train,
        temporal_prior=temporal_prior,
        feature_priors=spatial_priors,
        folds=folds,
        ridges=ridges,
        verbosity=2,
        method=method,
        normalize_kernel=normalize_kernel,
    )

    # select a non-spherical prior
    spidx = 0
    sprior_ridge = res['spatial'][spidx]
    newridges = res['ridges']
    ridge_scale = newridges[0]

    res = models.estimate_simple_stem_wmvnp(
        features_train,
        responses_train,
        features_test=None,
        responses_test=None,
        temporal_prior=temporal_prior,
        temporal_hhparam=1.0,
        feature_priors=spatial_priors,
        feature_hyparams=sprior_ridge,
        weights=True,
        performance=False,
        predictions=False,
        ridge_scale=ridge_scale,
        verbosity=2,
        method='SVD',
    )

    weights = models.dual2primal_weights(
        res['weights'],
        features_train,
        spatial_priors,
        sprior_ridge,
        temporal_prior,
    )
    weights = np.vstack(weights)

    ### solve problem directly
    Xx = np.hstack([tikutils.delay_signal(t.astype(np.float64), delays)\
                    for i,t in enumerate(features_train)])

    # get scaled priors
    spriors = [
        sp.get_prior(param) for sp, param in zip(spatial_priors, sprior_ridge)
    ]
    # get temporal prior
    tprior = temporal_prior.get_prior(1.0)
    tprior += np.identity(tprior.shape[0]) * 1e-10
    # combine
    from scipy import linalg as LA
    prior = LA.block_diag(*[np.kron(tprior, spr) for spr in spriors])

    # solve problem indirectly # dual
    XSigmaXT = np.linalg.multi_dot(
        [Xx, prior, Xx.T]) + (ridge_scale**2.0) * np.identity(Xx.shape[0])
    alphas = np.dot(np.linalg.inv(XSigmaXT), responses_train)
    assert np.allclose(alphas, res['weights'])
    betas_dual = np.linalg.multi_dot([prior, Xx.T, alphas])
    assert np.allclose(betas_dual, weights)

    # solve problem directly # primal
    penalty = np.linalg.inv(prior)
    XTXSigma = np.dot(Xx.T, Xx) + (ridge_scale**2.0) * penalty
    XTY = np.dot(Xx.T, responses_train)
    betas = np.dot(np.linalg.inv(XTXSigma), XTY)
    # check solutions
    try:
        assert np.allclose(betas, weights)
    except AssertionError:
        # numerical error with HRF because of rank
        print('asserting correlation')
        assert np.allclose(
            np.corrcoef(betas.ravel(), weights.ravel())[0, 1], 1.0)
Пример #7
0
def test_ridge_solution(normalize_kernel=True, method='SVD'):
    # make sure we can recover the ridge solution
    ridges = np.round(np.logspace(-3, 3, 5), 4)
    nridges = len(ridges)

    delays = np.unique(np.random.randint(0, 10, 10))
    ndelays = len(delays)

    features_train, features_test, responses_train, responses_test = get_abc_data(
    )
    features_sizes = [fs.shape[1] for fs in features_train]

    spatial_priors = [
        sps.SphericalPrior(features_sizes[0], hyparams=[1]),
        sps.SphericalPrior(features_sizes[1], hyparams=[0.1, 1]),
        sps.SphericalPrior(features_sizes[2], hyparams=[0.1, 1]),
    ]

    tpriors = [tps.SphericalPrior(delays)]
    temporal_prior = tpriors[0]
    folds = tikutils.generate_trnval_folds(
        responses_train.shape[0],
        sampler='bcv',
        nfolds=(1, 5),
    )
    folds = list(folds)
    res = models.crossval_stem_wmvnp(
        features_train,
        responses_train,
        temporal_prior=temporal_prior,
        feature_priors=spatial_priors,
        folds=folds,
        ridges=ridges,
        verbosity=2,
        method=method,
        normalize_kernel=normalize_kernel,
    )

    # select a non-spherical prior
    spidx = 0
    sprior_ridge = res['spatial'][spidx]
    newridges = res['ridges']
    ridge_scale = newridges[-1]

    # direct fit
    X = np.hstack([tikutils.delay_signal(t.astype(np.float64), delays)*(sprior_ridge[i]**-1)\
                   for i,t in enumerate(features_train)])

    fit = models.cvridge(
        X,
        responses_train,
        folds=folds,
        ridges=newridges,
        verbose=True,
    )

    print(newridges)
    print(res['spatial'].squeeze())
    print(res['ridges'].squeeze())
    assert np.allclose(fit['cvresults'].squeeze(),
                       res['cvresults'].squeeze()[:, spidx])

    fit = models.cvridge(
        X,
        responses_train,
        folds=folds,
        ridges=[ridge_scale],
        verbose=True,
        weights=True,
        kernel_weights=True,
    )

    res = models.estimate_simple_stem_wmvnp(
        features_train,
        responses_train,
        features_test=None,
        responses_test=None,
        temporal_prior=temporal_prior,
        temporal_hhparam=1.0,
        feature_priors=spatial_priors,
        feature_hyparams=sprior_ridge,
        weights=True,
        performance=False,
        predictions=False,
        ridge_scale=ridge_scale,
        verbosity=2,
        method='SVD',
    )

    # check kernel weights are the same
    assert np.allclose(res['weights'].squeeze(), fit['weights'].squeeze())

    primal = models.solve_l2_primal(X,
                                    responses_train,
                                    ridges=[ridge_scale],
                                    weights=True)

    # check projection from kernel to standard form solution is correct
    W = np.dot(X.T, res['weights'])
    assert np.allclose(W, primal['weights'])

    # check projection from standard solution to tikhonov solution is correct
    weights = models.dual2primal_weights(
        res['weights'],
        features_train,
        spatial_priors,
        sprior_ridge,
        temporal_prior,
    )
    weights = np.vstack(weights)

    ### solve problem directly
    Xx = np.hstack([tikutils.delay_signal(t.astype(np.float64), delays)\
                    for i,t in enumerate(features_train)])

    # get scaled priors
    spriors = [
        sp.get_prior(param) for sp, param in zip(spatial_priors, sprior_ridge)
    ]
    # combine
    from scipy import linalg as LA
    sprior = LA.block_diag(*spriors)
    # get temporal prior
    tprior = temporal_prior.get_prior(1.0)
    # get full prior
    prior = np.kron(sprior, tprior)
    # get penalty
    penalty = np.linalg.inv(prior)
    # solve problem directly
    XTXSigma = np.dot(Xx.T, Xx) + ridge_scale**2 * penalty
    XTY = np.dot(Xx.T, responses_train)
    betas = np.dot(np.linalg.inv(XTXSigma), XTY)
    # check solutions
    assert np.allclose(betas, weights)