Пример #1
0
def ezensemble(X_train, y_train):
    a = list(X_train)
    ee = EasyEnsemble(random_state=0, n_subsets=10)
    ee.fit(X_train, y_train)
    X_resampled, y_resampled = ee.fit_sample(X_train, y_train)
    X_resampled = pd.DataFrame(X_resampled[1], columns=a)
    y_resampled = pd.DataFrame(y_resampled[1], columns=['Target'])
    return X_resampled, y_resampled
Пример #2
0
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    ee = EasyEnsemble(random_state=RND_SEED)
    ee.fit(X, Y)
    assert_raises(RuntimeError, ee.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    ee = EasyEnsemble(random_state=RND_SEED)
    ee.fit(X, Y)
    assert_raises(RuntimeError, ee.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
def test_ee_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    ee.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(ee.min_c_, 0)
    assert_equal(ee.maj_c_, 1)
    assert_equal(ee.stats_c_[0], 500)
    assert_equal(ee.stats_c_[1], 4500)
Пример #5
0
def test_ee_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    ee.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(ee.min_c_, 0)
    assert_equal(ee.maj_c_, 1)
    assert_equal(ee.stats_c_[0], 500)
    assert_equal(ee.stats_c_[1], 4500)
Пример #6
0
    def get_batch(self, tokenized_samples, labels):
        e = EasyEnsemble(random_state=0, n_subsets=1)
        e.fit(tokenized_samples, labels)
        X_resampled, y_resampled = e.sample(tokenized_samples, labels)

        X = X_resampled[0]
        y = y_resampled[0]

        targets = np.zeros(shape=(len(X), self._num_labels))
        samples = np.zeros(shape=(len(X), self._max_document_length))

        for sample_ix, sample in enumerate(X):
            label = y[sample_ix]

            targets[sample_ix, label] = 1
            samples[sample_ix, :sample.shape[0]] = \
                sample[:self._max_document_length]
        return samples, targets