Пример #1
0
def test_fit_transform_auto_early_stop():
    """Test the fit and transform routine with auto ratio with a static number
    of subsets."""

    # Define the ratio parameter
    ratio = 'auto'
    n_subset = 4

    # Create the sampling object
    bc = BalanceCascade(ratio=ratio,
                        random_state=RND_SEED,
                        return_indices=True,
                        n_max_subset=n_subset)

    # Get the different subset
    X_resampled, y_resampled, idx_under = bc.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'bc_x_n_sub.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'bc_y_n_sub.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'bc_idx_n_sub.npy'))
    # Check each array
    for idx in range(X_gt.size):
        assert_array_equal(X_resampled[idx], X_gt[idx])
        assert_array_equal(y_resampled[idx], y_gt[idx])
        assert_array_equal(idx_under[idx], idx_gt[idx])
Пример #2
0
def test_fit_transform_auto_gradient_boosting():
    """Test the fit and transform routine with auto ratio with a gradient
    boosting."""

    # Define the ratio parameter
    ratio = 'auto'
    classifier = 'gradient-boosting'

    # Create the sampling object
    bc = BalanceCascade(ratio=ratio,
                        random_state=RND_SEED,
                        return_indices=True,
                        classifier=classifier)

    # Get the different subset
    X_resampled, y_resampled, idx_under = bc.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'bc_x_gb.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'bc_y_gb.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'bc_idx_gb.npy'))
    # Check each array
    for idx in range(X_gt.size):
        assert_array_equal(X_resampled[idx], X_gt[idx])
        assert_array_equal(y_resampled[idx], y_gt[idx])
        assert_array_equal(idx_under[idx], idx_gt[idx])
Пример #3
0
def test_bc_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    bc.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(bc.min_c_, 0)
    assert_equal(bc.maj_c_, 1)
    assert_equal(bc.stats_c_[0], 500)
    assert_equal(bc.stats_c_[1], 4500)
def test_bc_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    bc.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(bc.min_c_, 0)
    assert_equal(bc.maj_c_, 1)
    assert_equal(bc.stats_c_[0], 500)
    assert_equal(bc.stats_c_[1], 4500)
Пример #5
0
def test_bc_fit_invalid_ratio():
    """Test either if an error is raised when the balancing ratio to fit is
    smaller than the one of the data"""

    # Create the object
    ratio = 1. / 10000.
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    assert_raises(RuntimeError, bc.fit, X, Y)
Пример #6
0
def test_fit_transform_half():
    """Test the fit and transform routine with 0.5 ratio."""

    # Define the ratio parameter
    ratio = 0.5

    # Create the sampling object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)

    # Get the different subset
    X_resampled, y_resampled = bc.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'bc_x_05.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'bc_y_05.npy'))
    # Check each array
    for idx in range(X_gt.size):
        assert_array_equal(X_resampled[idx], X_gt[idx])
        assert_array_equal(y_resampled[idx], y_gt[idx])
Пример #7
0
def test_transform_wt_fit():
    """Test either if an error is raised when transform is called before
    fitting"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    assert_raises(RuntimeError, bc.transform, X, Y)
def test_fit_transform_half():
    """Test the fit and transform routine with 0.5 ratio."""

    # Define the ratio parameter
    ratio = 0.5

    # Create the sampling object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)

    # Get the different subset
    X_resampled, y_resampled = bc.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'bc_x_05.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'bc_y_05.npy'))
    # Check each array
    for idx in range(X_gt.size):
        assert_array_equal(X_resampled[idx], X_gt[idx])
        assert_array_equal(y_resampled[idx], y_gt[idx])
Пример #9
0
def test_bc_fit_single_class():
    """Test either if an error when there is a single class"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Resample the data
    # Create a wrong y
    y_single_class = np.zeros((X.shape[0], ))
    assert_raises(RuntimeError, bc.fit, X, y_single_class)
def test_fit_transform_auto():
    """Test the fit and transform routine with auto ratio."""

    # Define the ratio parameter
    ratio = 'auto'

    # Create the sampling object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED,
                        return_indices=True)

    # Get the different subset
    X_resampled, y_resampled, idx_under = bc.fit_transform(X, Y)

    currdir = os.path.dirname(os.path.abspath(__file__))
    X_gt = np.load(os.path.join(currdir, 'data', 'bc_x.npy'))
    y_gt = np.load(os.path.join(currdir, 'data', 'bc_y.npy'))
    idx_gt = np.load(os.path.join(currdir, 'data', 'bc_idx.npy'))
    # Check each array
    for idx in range(X_gt.size):
        assert_array_equal(X_resampled[idx], X_gt[idx])
        assert_array_equal(y_resampled[idx], y_gt[idx])
        assert_array_equal(idx_under[idx], idx_gt[idx])
Пример #11
0
def test_bc_init():
    """Test the initialisation of the object"""

    # Define a ratio
    ratio = 1.
    verbose = True
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, verbose=verbose)

    assert_equal(bc.ratio_, ratio)
    assert_equal(bc.bootstrap, True)
    assert_equal(bc.n_max_subset, None)
    assert_equal(bc.rs_, RND_SEED)
    assert_equal(bc.verbose, verbose)
    assert_equal(bc.min_c_, None)
    assert_equal(bc.maj_c_, None)
    assert_equal(bc.stats_c_, {})
from unbalanced_dataset.ensemble import BalanceCascade

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=5000, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply the random under-sampling
bc = BalanceCascade()
X_resampled, y_resampled = bc.fit_transform(X, y)
X_res_vis = []
for X_res in X_resampled:
    X_res_vis.append(pca.transform(X_res))

# Two subplots, unpack the axes array immediately
f, (ax1, ax2) = plt.subplots(1, 2)

ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=0.5,
            edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')

ax2.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=0.5,