Exemplo n.º 1
0
def eval_fixed(models, data, theta=None, method='cosine'):
    """evaluates models on data, without any bootstrapping or
    cross-validation

    Args:
        models(list of pyrsa.model.Model): models to be evaluated
        data(pyrsa.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use

    Returns:
        float: evaluation

    """
    evaluations, theta, _ = input_check_model(models, theta, None, 1)
    if isinstance(models, Model):
        rdm_pred = models.predict_rdm(theta=theta)
        evaluations = np.array([[compare(rdm_pred, data, method)[0]]])
    elif isinstance(models, Iterable):
        for k in range(len(models)):
            rdm_pred = models[k].predict_rdm(theta=theta[k])
            evaluations[k] = np.mean(compare(rdm_pred, data, method)[0])
        evaluations = evaluations.reshape((1, len(models)))
    else:
        raise ValueError('models should be a pyrsa.model.Model or a list of' +
                         ' such objects')
    noise_ceil = boot_noise_ceiling(data,
                                    method=method,
                                    rdm_descriptor='index')
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='fixed',
                    noise_ceiling=noise_ceil)
    return result
Exemplo n.º 2
0
def eval_bootstrap_rdm(models,
                       data,
                       theta=None,
                       method='cosine',
                       N=1000,
                       rdm_descriptor='index',
                       boot_noise_ceil=True):
    """evaluates models on data
    performs bootstrapping to get a sampling distribution

    Args:
        models(pyrsa.model.Model): models to be evaluated
        data(pyrsa.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use
        N(int): number of samples
        rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap

    Returns:
        numpy.ndarray: vector of evaluations

    """
    evaluations, theta, _ = input_check_model(models, theta, None, N)
    noise_min = []
    noise_max = []
    for i in tqdm.trange(N):
        sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor)
        if isinstance(models, Model):
            rdm_pred = models.predict_rdm(theta=theta)
            evaluations[i] = np.mean(compare(rdm_pred, sample, method))
        elif isinstance(models, Iterable):
            j = 0
            for mod in models:
                rdm_pred = mod.predict_rdm(theta=theta[j])
                evaluations[i, j] = np.mean(compare(rdm_pred, sample, method))
                j += 1
        if boot_noise_ceil:
            noise_min_sample, noise_max_sample = boot_noise_ceiling(
                sample, method=method, rdm_descriptor=rdm_descriptor)
            noise_min.append(noise_min_sample)
            noise_max.append(noise_max_sample)
    if isinstance(models, Model):
        evaluations = evaluations.reshape((N, 1))
    if boot_noise_ceil:
        noise_ceil = np.array([noise_min, noise_max])
    else:
        noise_ceil = np.array(
            boot_noise_ceiling(data,
                               method=method,
                               rdm_descriptor=rdm_descriptor))
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='bootstrap_rdm',
                    noise_ceiling=noise_ceil)
    return result
Exemplo n.º 3
0
def bootstrap_testset_rdm(model,
                          data,
                          method='cosine',
                          fitter=None,
                          N=1000,
                          rdm_descriptor=None):
    """takes a bootstrap sample and evaluates on the patterns not
    sampled
    also returns the size of each test_set to allow later weighting
    or selection if this is desired.

    Args:
        model(pyrsa.model.Model): Model to be evaluated
        datat(pyrsa.rdm.RDMs): RDM data to use
        method(string): comparison method to use
        fitter(function): fitting function for the model
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        numpy.ndarray: vector of evaluations of length
        numpy.ndarray: n_pattern for each test_set

    """
    evaluations, _, fitter = input_check_model(model, None, fitter, N)
    n_rdm = np.zeros(N, dtype=np.int)
    if rdm_descriptor is None:
        data.rdm_descriptors['index'] = np.arange(data.n_rdm)
        rdm_descriptor = 'index'
    data.pattern_descriptors['index'] = np.arange(data.n_cond)
    pattern_descriptor = 'index'
    for i_sample in range(N):
        sample, rdm_idx = bootstrap_sample_rdm(data,
                                               rdm_descriptor=rdm_descriptor)
        pattern_idx = np.arange(data.n_cond)
        train_set = [[sample, pattern_idx]]
        rdm_idx_test = data.rdm_descriptors[rdm_descriptor]
        rdm_idx_test = np.setdiff1d(rdm_idx_test, rdm_idx)
        if len(rdm_idx_test) >= 1:
            rdms_test = data.subsample(rdm_descriptor, rdm_idx_test)
            test_set = [[rdms_test, pattern_idx]]
            evaluations[i_sample] = crossval(
                model,
                data,
                train_set,
                test_set,
                method=method,
                fitter=fitter,
                pattern_descriptor=pattern_descriptor).evaluations[:, 0]
        else:
            evaluations[i_sample] = np.nan
        n_rdm[i_sample] = len(rdm_idx_test)
    return evaluations, n_rdm
Exemplo n.º 4
0
def crossval(models,
             rdms,
             train_set,
             test_set,
             ceil_set=None,
             method='cosine',
             fitter=None,
             pattern_descriptor='index'):
    """evaluates models on cross-validation sets

    Args:
        models(pyrsa.model.Model): models to be evaluated
        rdms(pyrsa.rdm.RDMs): full dataset
        train_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        numpy.ndarray: vector of evaluations

    """
    assert len(train_set) == len(test_set), \
        'train_set and test_set must have the same length'
    if ceil_set is not None:
        assert len(ceil_set) == len(test_set), \
            'ceil_set and test_set must have the same length'
    evaluations = []
    noise_ceil = []
    for i in range(len(train_set)):
        train = train_set[i]
        test = test_set[i]
        if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2
                or test[0].n_cond <= 2):
            if isinstance(models, Model):
                evals = np.nan
            elif isinstance(models, Iterable):
                evals = np.empty(len(models)) * np.nan
        else:
            if isinstance(models, Model):
                if fitter is None:
                    fitter = models.default_fitter
                theta = fitter(models,
                               train[0],
                               method=method,
                               pattern_idx=train[1],
                               pattern_descriptor=pattern_descriptor)
                pred = models.predict_rdm(theta)
                pred = pred.subsample_pattern(by=pattern_descriptor,
                                              value=test[1])
                evals = np.mean(compare(pred, test[0], method))
            elif isinstance(models, Iterable):
                evals, _, fitter = input_check_model(models, None, fitter)
                for j in range(len(models)):
                    theta = fitter[j](models[j],
                                      train[0],
                                      method=method,
                                      pattern_idx=train[1],
                                      pattern_descriptor=pattern_descriptor)
                    pred = models[j].predict_rdm(theta)
                    pred = pred.subsample_pattern(by=pattern_descriptor,
                                                  value=test[1])
                    evals[j] = np.mean(compare(pred, test[0], method))
            if ceil_set is None:
                noise_ceil.append(
                    boot_noise_ceiling(rdms.subsample_pattern(
                        by=pattern_descriptor, value=test[1]),
                                       method=method))
        evaluations.append(evals)
    if isinstance(models, Model):
        models = [models]
    evaluations = np.array(evaluations).T  # .T to switch models/set order
    evaluations = evaluations.reshape((1, len(models), len(train_set)))
    if ceil_set is not None:
        noise_ceil = cv_noise_ceiling(rdms,
                                      ceil_set,
                                      test_set,
                                      method=method,
                                      pattern_descriptor=pattern_descriptor)
    else:
        noise_ceil = np.array(noise_ceil).T
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='crossvalidation',
                    noise_ceiling=noise_ceil)
    return result