def split_generator(X, Y, n_cv=5, random_seed=1234): """""" if Y[2].isna().unique() == True: label = Y[3].values # returning generator split_gen = StratifiedKFold(n_splits=n_cv, shuffle=True, random_state=random_seed).split(X, label) else: train_ix = Y[Y[2].isin({'train', 'valid'})][0].values test_ix = Y[Y[2].isin({'test'})][0].values split_gen = [] for _ in range(n_cv): np.random.shuffle(train_ix) split_gen.append((train_ix, test_ix)) for train_ix, test_ix in split_gen: yield train_ix, test_ix
def data_partition(sample_size_list): num_sum = 0 seed_ = 42 folds_num = 5 label_all = [] for i in range(len(sample_size_list)): tmp_labels = [float(i)] * sample_size_list[i] label_all += tmp_labels num_sum += sample_size_list[i] label_all = np.array(label_all) pse_data = np.random.normal(loc=0.0, scale=1.0, size=[num_sum, num_sum]) folds = StratifiedKFold(folds_num, shuffle=True, random_state=np.random.RandomState(seed_)) folds_temp = list(folds.split(pse_data, label_all)) folds = [] for i in range(folds_num): train_index = folds_temp[i][0] test_index = folds_temp[i][1] folds.append((train_index, test_index)) return folds