Exemplo n.º 1
0
def eval_fixed(models, data, theta=None, method='cosine'):
    """evaluates models on data, without any bootstrapping or
    cross-validation

    Args:
        models(list of pyrsa.model.Model): models to be evaluated
        data(pyrsa.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use

    Returns:
        float: evaluation

    """
    evaluations, theta, _ = input_check_model(models, theta, None, 1)
    if isinstance(models, Model):
        rdm_pred = models.predict_rdm(theta=theta)
        evaluations = np.array([[compare(rdm_pred, data, method)[0]]])
    elif isinstance(models, Iterable):
        for k in range(len(models)):
            rdm_pred = models[k].predict_rdm(theta=theta[k])
            evaluations[k] = np.mean(compare(rdm_pred, data, method)[0])
        evaluations = evaluations.reshape((1, len(models)))
    else:
        raise ValueError('models should be a pyrsa.model.Model or a list of' +
                         ' such objects')
    noise_ceil = boot_noise_ceiling(data,
                                    method=method,
                                    rdm_descriptor='index')
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='fixed',
                    noise_ceiling=noise_ceil)
    return result
Exemplo n.º 2
0
def boot_noise_ceiling(rdms, method='cosine', rdm_descriptor='index'):
    """ calculates a noise ceiling by leave one out & full set

    Args:
        rdms(pyrsa.rdm.RDMs): data to calculate noise ceiling
        method(string): comparison method to use
        rdm_descriptor(string): descriptor to group rdms

    Returns:
        list: [lower nc-bound, upper nc-bound]

    """
    _, test_set, ceil_set = sets_leave_one_out_rdm(rdms, rdm_descriptor)
    pred_test = pool_rdm(rdms, method=method)
    noise_min = []
    noise_max = []
    for i in range(len(ceil_set)):
        train = ceil_set[i]
        test = test_set[i]
        pred_train = pool_rdm(train[0], method=method)
        noise_min.append(np.mean(compare(pred_train, test[0], method)))
        noise_max.append(np.mean(compare(pred_test, test[0], method)))
    noise_min = np.mean(np.array(noise_min))
    noise_max = np.mean(np.array(noise_max))
    return noise_min, noise_max
Exemplo n.º 3
0
def eval_bootstrap_rdm(models,
                       data,
                       theta=None,
                       method='cosine',
                       N=1000,
                       rdm_descriptor='index',
                       boot_noise_ceil=True):
    """evaluates models on data
    performs bootstrapping to get a sampling distribution

    Args:
        models(pyrsa.model.Model): models to be evaluated
        data(pyrsa.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use
        N(int): number of samples
        rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap

    Returns:
        numpy.ndarray: vector of evaluations

    """
    evaluations, theta, _ = input_check_model(models, theta, None, N)
    noise_min = []
    noise_max = []
    for i in tqdm.trange(N):
        sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor)
        if isinstance(models, Model):
            rdm_pred = models.predict_rdm(theta=theta)
            evaluations[i] = np.mean(compare(rdm_pred, sample, method))
        elif isinstance(models, Iterable):
            j = 0
            for mod in models:
                rdm_pred = mod.predict_rdm(theta=theta[j])
                evaluations[i, j] = np.mean(compare(rdm_pred, sample, method))
                j += 1
        if boot_noise_ceil:
            noise_min_sample, noise_max_sample = boot_noise_ceiling(
                sample, method=method, rdm_descriptor=rdm_descriptor)
            noise_min.append(noise_min_sample)
            noise_max.append(noise_max_sample)
    if isinstance(models, Model):
        evaluations = evaluations.reshape((N, 1))
    if boot_noise_ceil:
        noise_ceil = np.array([noise_min, noise_max])
    else:
        noise_ceil = np.array(
            boot_noise_ceiling(data,
                               method=method,
                               rdm_descriptor=rdm_descriptor))
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='bootstrap_rdm',
                    noise_ceiling=noise_ceil)
    return result
Exemplo n.º 4
0
def fit_select(model,
               data,
               method='cosine',
               pattern_idx=None,
               pattern_descriptor=None):
    """ fits selection models by evaluating each rdm and selcting the one
    with best performance. Works only for ModelSelect

    Args:
        model(pyrsa.model.Model): model to be fit
        data(pyrsa.rdm.RDMs): Data to fit to
        method(String): Evaluation method
        pattern_idx(numpy.ndarray): Which patterns are sampled
        pattern_descriptor(String): Which descriptor is used

    Returns:
        theta(int): parameter vector

    """
    evaluations = np.zeros(model.n_rdm)
    for i_rdm in range(model.n_rdm):
        pred = model.predict_rdm(i_rdm)
        if not (pattern_idx is None or pattern_descriptor is None):
            pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
        evaluations[i_rdm] = np.mean(compare(pred, data, method=method))
    theta = np.argmax(evaluations)
    return theta
Exemplo n.º 5
0
def _loss(theta,
          model,
          data,
          method='cosine',
          cov=None,
          pattern_descriptor=None,
          pattern_idx=None):
    """Method for calculating a loss for a model and parameter combination

    Args:
        theta(numpy.ndarray): evaluated parameter value
        model(Model): the model to be fit
        data(pyrsa.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        cov(numpy.ndarray, optional):
            Covariance matrix for likelihood based evaluation.
            It is ignored otherwise. The default is None.

    Returns:

        numpy.ndarray: loss

    """
    pred = model.predict_rdm(theta)
    if not (pattern_idx is None or pattern_descriptor is None):
        pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
    return -np.mean(compare(pred, data, method=method))
Exemplo n.º 6
0
def cv_noise_ceiling(rdms, ceil_set, test_set, method='cosine',
                     pattern_descriptor='index'):
    """ calculates the noise ceiling for crossvalidation.
    The upper bound is calculated by pooling all rdms for the appropriate
    patterns in the testsets.
    the lower bound is calculated by using only the appropriate rdms
    from ceil_set for training.

    Args:
        rdms(pyrsa.rdm.RDMs): complete data
        ceil_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        list: lower nc-bound, upper nc-bound

    """
    assert len(ceil_set) == len(test_set), \
        'train_set and test_set must have the same length'
    noise_min = []
    noise_max = []
    for i in range(len(ceil_set)):
        train = ceil_set[i]
        test = test_set[i]
        pred_train = pool_rdm(train[0], method=method)
        pred_train = pred_train.subsample_pattern(by=pattern_descriptor,
                                                  value=test[1])
        pred_test = pool_rdm(rdms, method=method)
        pred_test = pred_test.subsample_pattern(by=pattern_descriptor,
                                                value=test[1])
        noise_min.append(np.mean(compare(pred_train, test[0], method)))
        noise_max.append(np.mean(compare(pred_test, test[0], method)))
    noise_min = np.mean(np.array(noise_min))
    noise_max = np.mean(np.array(noise_max))
    return noise_min, noise_max
Exemplo n.º 7
0
    def test_temporal_rsa(self):
        import numpy as np
        import matplotlib.pyplot as plt
        import pyrsa
        import pickle
        from pyrsa.rdm import calc_rdm_movie

        import os
        path = os.path.dirname(os.path.abspath(__file__))
        dat = pickle.load(
            open(
                os.path.join(path, '..', 'demos', "TemporalSampleData",
                             "meg_sample_data.pkl"), "rb"))
        measurements = dat['data']
        cond_names = [x for x in dat['cond_names'].keys()]
        cond_idx = dat['cond_idx']
        channel_names = dat['channel_names']
        times = dat['times']
        print(
            'there are %d observations (trials), %d channels, and %d time-points\n'
            % (measurements.shape))
        print('conditions:')
        print(cond_names)

        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
        ax = ax.flatten()
        for jj, chan in enumerate(channel_names[:2]):
            for ii, cond_ii in enumerate(np.unique(cond_idx)):
                mn = measurements[cond_ii == cond_idx, jj, :].mean(0).squeeze()
                ax[jj].plot(times, mn, label=cond_names[ii])
                ax[jj].set_title(chan)
        ax[jj].legend()
        tim_des = {'time': times}
        des = {'session': 0, 'subj': 0}
        obs_des = {'conds': cond_idx}
        chn_des = {'channels': channel_names}
        data = pyrsa.data.TemporalDataset(measurements,
                                          descriptors=des,
                                          obs_descriptors=obs_des,
                                          channel_descriptors=chn_des,
                                          time_descriptors=tim_des)
        data.sort_by('conds')
        print('shape of original measurements')
        print(data.measurements.shape)
        data_split_time = data.split_time('time')
        print('\nafter splitting')
        print(len(data_split_time))
        print(data_split_time[0].measurements.shape)
        print('shape of original measurements')
        print(data.measurements.shape)
        data_subset_time = data.subset_time('time', t_from=-.1, t_to=.5)
        print('\nafter subsetting')
        print(data_subset_time.measurements.shape)
        print(data_subset_time.time_descriptors['time'][0])
        bins = np.reshape(tim_des['time'], [-1, 2])
        print(len(bins))
        print(bins[0])
        print('shape of original measurements')
        print(data.measurements.shape)
        data_binned = data.bin_time('time', bins=bins)
        print('\nafter binning')
        print(data_binned.measurements.shape)
        print(data_binned.time_descriptors['time'][0])
        print('shape of original measurements')
        print(data.measurements.shape)
        data_dataset = data.convert_to_dataset('time')
        print('\nafter binning')
        print(data_dataset.measurements.shape)
        print(data_dataset.obs_descriptors['time'][0])
        rdms_data = calc_rdm_movie(data,
                                   method='euclidean',
                                   descriptor='conds')
        print(rdms_data)
        rdms_data_binned = calc_rdm_movie(data,
                                          method='euclidean',
                                          descriptor='conds',
                                          bins=bins)
        print(rdms_data_binned)
        plt.figure(figsize=(10, 15))
        # add formated time as rdm_descriptor
        rdms_data_binned.rdm_descriptors['time_formatted'] = [
            '%0.0f ms' % (np.round(x * 1000, 2))
            for x in rdms_data_binned.rdm_descriptors['time']
        ]

        pyrsa.vis.show_rdm(rdms_data_binned,
                           do_rank_transform=False,
                           pattern_descriptor='conds',
                           rdm_descriptor='time_formatted')
        from pyrsa.rdm import get_categorical_rdm
        rdms_model_in = get_categorical_rdm(['%d' % x for x in range(4)])
        rdms_model_lr = get_categorical_rdm(['l', 'r', 'l', 'r'])
        rdms_model_av = get_categorical_rdm(['a', 'a', 'v', 'v'])
        model_names = ['independent', 'left/right', 'audio/visual']
        # append in one RDMs object
        model_rdms = rdms_model_in
        model_rdms.append(rdms_model_lr)
        model_rdms.append(rdms_model_av)
        model_rdms.rdm_descriptors['model_names'] = model_names
        model_rdms.pattern_descriptors['cond_names'] = cond_names
        plt.figure(figsize=(10, 10))
        pyrsa.vis.show_rdm(model_rdms,
                           rdm_descriptor='model_names',
                           pattern_descriptor='cond_names')
        from pyrsa.rdm import compare
        r = []
        for mod in model_rdms:
            r.append(compare(mod, rdms_data_binned, method='cosine'))
        for i, r_ in enumerate(r):
            plt.plot(rdms_data_binned.rdm_descriptors['time'],
                     r_.squeeze(),
                     label=model_names[i])
        plt.xlabel('time')
        plt.ylabel('model-data cosine similarity')
        plt.legend()
Exemplo n.º 8
0
def crossval(models,
             rdms,
             train_set,
             test_set,
             ceil_set=None,
             method='cosine',
             fitter=None,
             pattern_descriptor='index'):
    """evaluates models on cross-validation sets

    Args:
        models(pyrsa.model.Model): models to be evaluated
        rdms(pyrsa.rdm.RDMs): full dataset
        train_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        numpy.ndarray: vector of evaluations

    """
    assert len(train_set) == len(test_set), \
        'train_set and test_set must have the same length'
    if ceil_set is not None:
        assert len(ceil_set) == len(test_set), \
            'ceil_set and test_set must have the same length'
    evaluations = []
    noise_ceil = []
    for i in range(len(train_set)):
        train = train_set[i]
        test = test_set[i]
        if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2
                or test[0].n_cond <= 2):
            if isinstance(models, Model):
                evals = np.nan
            elif isinstance(models, Iterable):
                evals = np.empty(len(models)) * np.nan
        else:
            if isinstance(models, Model):
                if fitter is None:
                    fitter = models.default_fitter
                theta = fitter(models,
                               train[0],
                               method=method,
                               pattern_idx=train[1],
                               pattern_descriptor=pattern_descriptor)
                pred = models.predict_rdm(theta)
                pred = pred.subsample_pattern(by=pattern_descriptor,
                                              value=test[1])
                evals = np.mean(compare(pred, test[0], method))
            elif isinstance(models, Iterable):
                evals, _, fitter = input_check_model(models, None, fitter)
                for j in range(len(models)):
                    theta = fitter[j](models[j],
                                      train[0],
                                      method=method,
                                      pattern_idx=train[1],
                                      pattern_descriptor=pattern_descriptor)
                    pred = models[j].predict_rdm(theta)
                    pred = pred.subsample_pattern(by=pattern_descriptor,
                                                  value=test[1])
                    evals[j] = np.mean(compare(pred, test[0], method))
            if ceil_set is None:
                noise_ceil.append(
                    boot_noise_ceiling(rdms.subsample_pattern(
                        by=pattern_descriptor, value=test[1]),
                                       method=method))
        evaluations.append(evals)
    if isinstance(models, Model):
        models = [models]
    evaluations = np.array(evaluations).T  # .T to switch models/set order
    evaluations = evaluations.reshape((1, len(models), len(train_set)))
    if ceil_set is not None:
        noise_ceil = cv_noise_ceiling(rdms,
                                      ceil_set,
                                      test_set,
                                      method=method,
                                      pattern_descriptor=pattern_descriptor)
    else:
        noise_ceil = np.array(noise_ceil).T
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='crossvalidation',
                    noise_ceiling=noise_ceil)
    return result
Exemplo n.º 9
0
    #data_burst = np.empty([len(area_data_burstprop),list(torch.flatten(area_data_burstprop[0]).shape)[0]])
    for i in range(len(area_data_ann)):
        area_data_ann[i] = area_data_ann[i].cpu()
        #area_data_hebb[i] = area_data_hebb[i].cpu()
        #area_data_burstprop[i] = area_data_burstprop[i].cpu()
        data_ann[i, :] = torch.flatten(area_data_ann[i]).detach().numpy()
        #data_hebb[i,:] = torch.flatten(area_data_hebb[i]).detach().numpy()
        #data_burst[i,:] = torch.flatten(area_data_burstprop[i]).detach().numpy()
    dataset_ann = data.dataset.Dataset(data_ann)
    rdm_ann = rdm.calc.calc_rdm(dataset_ann)
    #dataset_hebb = data.dataset.Dataset(data_hebb)
    #rdm_hebb = rdm.calc.calc_rdm(dataset_hebb)
    #dataset_burst = data.dataset.Dataset(data_burst)
    #rdm_burst = rdm.calc.calc_rdm(dataset_burst)

    similarity_ann_area = rdm.compare(rdm_area, rdm_ann)
    #similarity_hebb_area = rdm.compare(rdm_area, rdm_hebb)
    #similarity_burst_area = rdm.compare(rdm_area, rdm_burst)

    rsa = [rdm_ann, similarity_ann_area]
    #rsa_hebb = [rdm_hebb, similarity_hebb_area]
    #rsa_burst = [rdm_burst, similarity_burst_area]
    #rsa_all = [rsa_ann, rsa_hebb, rsa_burst]

    brain_area_rsa.update({ar: rsa})

a_file = open(
    "./" + model + "/final_model_data/" + model + "_brain_area_rsa.pkl", "wb")
pickle.dump(brain_area_rsa, a_file)
a_file.close()
print('Completed RSA for model: ' + model)