def make_fixture(binary=False, balanced=False, split=False): """ Make a dataset for testing ClassBalance based on the specified params. """ kwargs = { "n_samples": 100, "n_features": 20, "n_informative": 8, "n_redundant": 2, "n_clusters_per_class": 1, "random_state": 89092, } if binary: kwargs['n_classes'] = 2 kwargs['weights'] = None if balanced else [0.3, 0.7] else: kwargs['n_classes'] = 5 kwargs['weights'] = None if balanced else [0.1, 0.2, 0.4, 0.2, .01] X, y = make_classification(**kwargs) if split: X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=101) return Dataset(Split(X_train, X_test), Split(y_train, y_test)) return Dataset(X, y)
def digits(request): """ Creates a fixture of train and test splits for the sklearn digits dataset For ease of use returns a Dataset named tuple composed of two Split tuples. """ data = load_digits() X_train, X_test, y_train, y_test = tts(data.data, data.target, test_size=0.2, random_state=11) # Set a class attribute for digits request.cls.digits = Dataset(Split(X_train, X_test), Split(y_train, y_test))
def data(request): """ Creates a fixture of train and test splits for the sklearn digits dataset For ease of use returns a Dataset named tuple composed of two Split tuples. """ X, y = make_regression(n_samples=500, n_features=22, n_informative=8, random_state=42) X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=11) # Set a class attribute for digits request.cls.data = Dataset(Split(X_train, X_test), Split(y_train, y_test))
def multiclass(request): """ Creates a random multiclass classification dataset fixture """ X, y = make_classification(n_samples=500, n_features=20, n_informative=8, n_redundant=2, n_classes=6, n_clusters_per_class=3, random_state=87) X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=93) dataset = Dataset(Split(X_train, X_test), Split(y_train, y_test)) request.cls.multiclass = dataset