Пример #1
0
def bootstrap_sample_pattern(rdms, pattern_descriptor='index'):
    """Draws a bootstrap_sample from the data.

    This function generates a bootstrap sample of RDMs resampled over
    patterns. By default every pattern is treated independently. If desired
    a descriptor name can be passed in pattern_descriptor to group patterns.

    Args:
        rdms(rsatoolbox.rdm.rdms.RDMs): Data to be used

        pattern_descriptors(string):
            descriptor to group the patterns by. Each group of patterns will
            be in or out of the sample as a whole

    Returns:
        rsatoolbox.rdm.rdms.RDMs: rdm_idx
            subsampled dataset with equal number of pattern groups

        numpy.ndarray: pattern_idx
            sampled pattern descriptor index values for subsampling other rdms
    """
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    pattern_idx = np.random.randint(0,
                                    len(pattern_select),
                                    size=len(pattern_select))
    pattern_idx = pattern_select[pattern_idx]
    rdms = rdms.subsample_pattern(pattern_descriptor, pattern_idx)
    return rdms, pattern_idx
Пример #2
0
def sets_of_k_pattern(rdms, pattern_descriptor=None, k=5, random=False):
    """ generates training and test set combinations by splitting into
    groups of k. This version splits in the given order or
    randomizes the order. If the number of patterns is not divisible by k
    patterns are added to the first groups such that those have k+1 patterns

    Args:
        rdms(rsatoolbox.rdm.RDMs): rdms to use
        pattern_descriptor(String): descriptor to select groups
        k(int): number of groups
        random(bool): whether the assignment shall be randomized

    Returns:
        train_set(list): list of tuples (rdms, pattern_idx)
        test_set(list): list of tuples (rdms, pattern_idx)

    """
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    assert k <= len(pattern_select) / 2, \
        'to form groups we can use at most half the patterns per group'
    n_groups = int(len(pattern_select) / k)
    return sets_k_fold_pattern(rdms,
                               pattern_descriptor=pattern_descriptor,
                               k=n_groups,
                               random=random)
Пример #3
0
def sets_leave_one_out_pattern(rdms, pattern_descriptor):
    """ generates training and test set combinations by leaving one level
    of pattern_descriptor out as a test set.
    This is only sensible if pattern_descriptor already defines larger groups!

    the ceil_train_set contains the rdms for the test-patterns from the
    training-rdms. This is required for computing the noise-ceiling

    Args:
        rdms(rsatoolbox.rdm.RDMs): rdms to use
        pattern_descriptor(String): descriptor to select groups

    Returns:
        train_set(list): list of tuples (rdms, pattern_idx)
        test_set(list): list of tuples (rdms, pattern_idx)
        ceil_set(list): list of tuples (rdms, pattern_idx)

    """
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    train_set = []
    test_set = []
    ceil_set = []
    for i_pattern in pattern_select:
        pattern_idx_train = np.setdiff1d(pattern_select, i_pattern)
        rdms_train = rdms.subset_pattern(pattern_descriptor, pattern_idx_train)
        pattern_idx_test = [i_pattern]
        rdms_test = rdms.subset_pattern(pattern_descriptor, pattern_idx_test)
        rdms_ceil = rdms.subset_pattern(pattern_descriptor, pattern_idx_test)
        train_set.append((rdms_train, pattern_idx_train))
        test_set.append((rdms_test, pattern_idx_test))
        ceil_set.append((rdms_ceil, pattern_idx_test))
    return train_set, test_set, ceil_set
Пример #4
0
def sets_k_fold_pattern(rdms,
                        pattern_descriptor='index',
                        k=None,
                        random=False):
    """ generates training and test set combinations by splitting into k
    similar sized groups. This version splits in the given order or
    randomizes the order. For k=1 training and test_set are whole dataset,
    i.e. no crossvalidation is performed.

    For only crossvalidating over patterns there is no independent training
    set for calculating a noise ceiling for the patterns.
    To express this we set ceil_set to None, which makes the crossvalidation
    function calculate a leave one rdm out noise ceiling for the right
    patterns instead.

    Args:
        rdms(rsatoolbox.rdm.RDMs): rdms to use
        pattern_descriptor(String): descriptor to select groups
        k(int): number of groups
        random(bool): whether the assignment shall be randomized

    Returns:
        train_set(list): list of tuples (rdms, pattern_idx)
        test_set(list): list of tuples (rdms, pattern_idx)
        ceil_set = None

    """
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    if k is None:
        k = default_k_pattern(len(pattern_select))
    assert k <= len(pattern_select), \
        'Can make at most as many groups as conditions'
    if random:
        np.random.shuffle(pattern_select)
    group_size = np.floor(len(pattern_select) / k)
    additional_patterns = len(pattern_select) % k
    train_set = []
    test_set = []
    for i_group in range(k):
        test_idx = np.arange(i_group * group_size, (i_group + 1) * group_size)
        if i_group < additional_patterns:
            test_idx = np.concatenate((test_idx, [-(i_group + 1)]))
        if k <= 1:
            train_idx = test_idx
        else:
            train_idx = np.setdiff1d(np.arange(len(pattern_select)), test_idx)
        pattern_idx_test = [pattern_select[int(idx)] for idx in test_idx]
        pattern_idx_train = [pattern_select[int(idx)] for idx in train_idx]
        rdms_test = rdms.subset_pattern(pattern_descriptor, pattern_idx_test)
        rdms_train = rdms.subset_pattern(pattern_descriptor, pattern_idx_train)
        test_set.append([rdms_test, pattern_idx_test])
        train_set.append([rdms_train, pattern_idx_train])
    ceil_set = None
    return train_set, test_set, ceil_set
Пример #5
0
def bootstrap_sample(rdms, rdm_descriptor='index', pattern_descriptor='index'):
    """Draws a bootstrap_sample from the data.

    This function generates a bootstrap sample of RDMs resampled over
    measurements and patterns. By default every pattern and RDM sample is
    treated independently. If desired descriptor names can be passed in
    descriptors and in pattern_descriptors to group rdms instead.

    Args:
        rdms(rsatoolbox.rdm.rdms.RDMs): Data to be used

        rdm_descriptors(String):
            descriptor to group the samples by. For each unique value of
            the descriptor each sample will either contain all RDMs with
            this value or none

        pattern_descriptors(string):
            descriptor to group the patterns by. Each group of patterns will
            be in or out of the sample as a whole

    Returns:
        rsatoolbox.rdm.rdms.RDMs: rdms
            subsampled dataset with equal number of groups in both patterns
            and measurements of the rdms

        numpy.ndarray: rdm_idx
            sampled rdm indices

        numpy.ndarray: pattern_idx
            sampled pattern descriptor indices

    """
    rdm_select = np.unique(rdms.rdm_descriptors[rdm_descriptor])
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    rdm_idx = np.random.randint(0, len(rdm_select), size=len(rdm_select))
    rdm_idx = rdm_select[rdm_idx]
    rdms = rdms.subsample(rdm_descriptor, rdm_idx)
    pattern_idx = np.random.randint(0,
                                    len(pattern_select),
                                    size=len(pattern_select))
    pattern_idx = pattern_select[pattern_idx]
    rdms = rdms.subsample_pattern(pattern_descriptor, pattern_idx)
    return rdms, rdm_idx, pattern_idx
Пример #6
0
def sets_k_fold(rdms,
                k_rdm=None,
                k_pattern=None,
                random=True,
                pattern_descriptor='index',
                rdm_descriptor='index'):
    """ generates training and test set combinations by splitting into k
    similar sized groups. This version splits both over rdms and over patterns
    resulting in k_rdm * k_pattern (training, test) pairs.

    If a k is set to 1 the corresponding dimension is not crossvalidated.

    Args:
        rdms(rsatoolbox.rdm.RDMs): rdms to use
        pattern_descriptor(String): descriptor to select pattern groups
        rdm_descriptor(String): descriptor to select rdm groups
        k_rdm(int): number of rdm groups
        k_pattern(int): number of pattern groups
        random(bool): whether the assignment shall be randomized

    Returns:
        train_set(list): list of tuples (rdms, pattern_idx)
        test_set(list): list of tuples (rdms, pattern_idx)
        ceil_set(list): list of tuples (rdms, pattern_idx)

    """
    rdm_select = rdms.rdm_descriptors[rdm_descriptor]
    rdm_select = np.unique(rdm_select)
    if k_rdm is None:
        k_rdm = default_k_rdm(len(rdm_select))
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    if k_pattern is None:
        k_pattern = default_k_pattern(len(pattern_select))
    assert k_rdm <= len(rdm_select), \
        'Can make at most as many groups as rdms'
    if random:
        np.random.shuffle(rdm_select)
    group_size_rdm = np.floor(len(rdm_select) / k_rdm)
    additional_rdms = len(rdm_select) % k_rdm
    train_set = []
    test_set = []
    ceil_set = []
    for i_group in range(k_rdm):
        test_idx = np.arange(i_group * group_size_rdm,
                             (i_group + 1) * group_size_rdm)
        if i_group < additional_rdms:
            test_idx = np.concatenate((test_idx, [-(i_group + 1)]))
        if k_rdm <= 1:
            train_idx = test_idx
        else:
            train_idx = np.setdiff1d(np.arange(len(rdm_select)), test_idx)
        rdm_idx_test = [rdm_select[int(idx)] for idx in test_idx]
        rdm_idx_train = [rdm_select[int(idx)] for idx in train_idx]
        rdms_test = rdms.subsample(rdm_descriptor, rdm_idx_test)
        rdms_train = rdms.subsample(rdm_descriptor, rdm_idx_train)
        train_new, test_new, _ = sets_k_fold_pattern(
            rdms_train,
            k=k_pattern,
            pattern_descriptor=pattern_descriptor,
            random=random)
        ceil_new = test_new.copy()
        for i_pattern in range(k_pattern):
            test_new[i_pattern][0] = rdms_test.subset_pattern(
                by=pattern_descriptor, value=test_new[i_pattern][1])
        train_set += train_new
        test_set += test_new
        ceil_set += ceil_new
    return train_set, test_set, ceil_set
Пример #7
0
def sets_random(rdms,
                n_rdm=None,
                n_pattern=None,
                n_cv=2,
                pattern_descriptor='index',
                rdm_descriptor='index'):
    """ generates training and test set combinations by selecting random
    test sets of n_rdm RDMs and n_pattern patterns and using the rest of
    the data as the training set.

    If a n is set to 0 the corresponding dimension is not crossvalidated.

    Args:
        rdms(rsatoolbox.rdm.RDMs): rdms to split
        pattern_descriptor(String): descriptor to select pattern groups
        rdm_descriptor(String): descriptor to select rdm groups
        n_rdm(int): number of rdms per test set
        n_pattern(int): number of patterns per test set

    Returns:
        train_set(list): list of tuples (rdms, pattern_idx)
        test_set(list): list of tuples (rdms, pattern_idx)
        ceil_set(list): list of tuples (rdms, pattern_idx)

    """
    rdm_select = rdms.rdm_descriptors[rdm_descriptor]
    rdm_select = np.unique(rdm_select)
    if n_rdm is None:
        k_rdm = default_k_rdm(len(rdm_select))
        n_rdm = int(np.floor(len(rdm_select) / k_rdm))
    pattern_descriptor, pattern_select = \
        add_pattern_index(rdms, pattern_descriptor)
    if n_pattern is None:
        k_pattern = default_k_pattern(len(pattern_select))
        n_pattern = int(np.floor(len(pattern_select) / k_pattern))
    train_set = []
    test_set = []
    ceil_set = []
    for _i_group in range(n_cv):
        # shuffle
        np.random.shuffle(rdm_select)
        np.random.shuffle(pattern_select)
        # choose indices based on n_rdm
        if n_rdm == 0:
            train_idx = np.arange(len(rdm_select))
            test_idx = np.arange(len(rdm_select))
        else:
            test_idx = np.arange(n_rdm)
            train_idx = np.arange(n_rdm, len(rdm_select))
        # take subset of rdms
        rdm_idx_test = [rdm_select[int(idx)] for idx in test_idx]
        rdm_idx_train = [rdm_select[int(idx)] for idx in train_idx]
        rdms_test = rdms.subsample(rdm_descriptor, rdm_idx_test)
        rdms_train = rdms.subsample(rdm_descriptor, rdm_idx_train)
        # choose indices based on n_pattern
        if n_pattern == 0:
            train_idx = np.arange(len(pattern_select))
            test_idx = np.arange(len(pattern_select))
        else:
            test_idx = np.arange(n_pattern)
            train_idx = np.arange(n_pattern, len(pattern_select))
        pattern_idx_test = [pattern_select[int(idx)] for idx in test_idx]
        pattern_idx_train = [pattern_select[int(idx)] for idx in train_idx]
        rdms_test = rdms_test.subset_pattern(pattern_descriptor,
                                             pattern_idx_test)
        rdms_ceil = rdms_train.subset_pattern(pattern_descriptor,
                                              pattern_idx_test)
        rdms_train = rdms_train.subset_pattern(pattern_descriptor,
                                               pattern_idx_train)
        test_set.append([rdms_test, pattern_idx_test])
        train_set.append([rdms_train, pattern_idx_train])
        ceil_set.append([rdms_ceil, pattern_idx_test])
    return train_set, test_set, ceil_set