Пример #1
0
def split_generator(X, Y, n_cv=5, random_seed=1234):
    """"""
    if Y[2].isna().unique() == True:
        label = Y[3].values
        # returning generator
        split_gen = StratifiedKFold(n_splits=n_cv,
                                    shuffle=True,
                                    random_state=random_seed).split(X, label)
    else:
        train_ix = Y[Y[2].isin({'train', 'valid'})][0].values
        test_ix = Y[Y[2].isin({'test'})][0].values

        split_gen = []
        for _ in range(n_cv):
            np.random.shuffle(train_ix)
            split_gen.append((train_ix, test_ix))

    for train_ix, test_ix in split_gen:
        yield train_ix, test_ix
Пример #2
0
def data_partition(sample_size_list):
    num_sum = 0
    seed_ = 42
    folds_num = 5
    label_all = []
    for i in range(len(sample_size_list)):
        tmp_labels = [float(i)] * sample_size_list[i]
        label_all += tmp_labels
        num_sum += sample_size_list[i]
    label_all = np.array(label_all)
    pse_data = np.random.normal(loc=0.0, scale=1.0, size=[num_sum, num_sum])
    folds = StratifiedKFold(folds_num,
                            shuffle=True,
                            random_state=np.random.RandomState(seed_))
    folds_temp = list(folds.split(pse_data, label_all))
    folds = []
    for i in range(folds_num):
        train_index = folds_temp[i][0]
        test_index = folds_temp[i][1]
        folds.append((train_index, test_index))
    return folds