Exemplo n.º 1
0
def test_binarizer():
    X_ = np.array([[1, 0, 5], [2, 3, -1]])

    for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix):

        X = init(X_.copy())

        binarizer = Binarizer(threshold=2.0, copy=True)
        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 4)
        assert_equal(np.sum(X_bin == 1), 2)
        X_bin = binarizer.transform(X)
        assert_equal(sparse.issparse(X), sparse.issparse(X_bin))

        binarizer = Binarizer(copy=True).fit(X)
        X_bin = toarray(binarizer.transform(X))
        assert_true(X_bin is not X)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=True)
        X_bin = binarizer.transform(X)
        assert_true(X_bin is not X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=False)
        X_bin = binarizer.transform(X)
        if init is not list:
            assert_true(X_bin is X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

    binarizer = Binarizer(threshold=-0.5, copy=True)
    for init in (np.array, list):
        X = init(X_.copy())

        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 1)
        assert_equal(np.sum(X_bin == 1), 5)
        X_bin = binarizer.transform(X)

    # Cannot use threshold < 0 for sparse
    assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
Exemplo n.º 2
0
def test_binarizer():
    X_ = np.array([[1, 0, 5], [2, 3, -1]])

    for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix):

        X = init(X_.copy())

        binarizer = Binarizer(threshold=2.0, copy=True)
        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 4)
        assert_equal(np.sum(X_bin == 1), 2)
        X_bin = binarizer.transform(X)
        assert_equal(sparse.issparse(X), sparse.issparse(X_bin))

        binarizer = Binarizer(copy=True).fit(X)
        X_bin = toarray(binarizer.transform(X))
        assert_true(X_bin is not X)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=True)
        X_bin = binarizer.transform(X)
        assert_true(X_bin is not X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=False)
        X_bin = binarizer.transform(X)
        if init is not list:
            assert_true(X_bin is X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

    binarizer = Binarizer(threshold=-0.5, copy=True)
    for init in (np.array, list):
        X = init(X_.copy())

        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 1)
        assert_equal(np.sum(X_bin == 1), 5)
        X_bin = binarizer.transform(X)

    # Cannot use threshold < 0 for sparse
    assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
Exemplo n.º 3
0
class CreateBinarizer(CreateModel):
    def fit(self, data, args):
        self.model = Binarizer()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval

    def test(self, data):
        assert self.model is not None

        return self.model.transform(data.X_test)

    def predict(self, data):
        with Timer() as t:
            self.predictions = self.test(data)

        data.learning_task = LearningTask.REGRESSION
        return t.interval
Exemplo n.º 4
0
    data.loc[data['Region'] == i, 'expensive than average region'] = data.loc[data['Region'] == i, 'Price'] - \
                                                             data.loc[data['Region'] == i, 'Price'].mean()
for i in range(1, 8):
    data.loc[data['Weekday'] == i, 'expensive than average weekday'] = data.loc[data['Weekday'] == i, 'Price'] - \
                                                                      data.loc[data['Weekday'] == i, 'Price'].mean()
for i in range(1, 366):
    data.loc[data['Date'] == i, 'expensive than average date'] = data.loc[data['Date'] == i, 'Price'] - \
                                                                      data.loc[data['Date'] == i, 'Price'].mean()
for i in range(2):
    data.loc[data['Apartment'] == i, 'expensive than average apartment'] = data.loc[data['Apartment'] == i, 'Price'] - \
                                                                      data.loc[data['Apartment'] == i, 'Price'].mean()
for i in range(1, 5):
    data.loc[data['Beds'] == i, 'expensive than average bed'] = data.loc[data['Beds'] == i, 'Price'] - \
                                                                data.loc[data['Beds'] == i, 'Price'].mean()
threshold1 = Binarizer(threshold=3.0)
res1 = pd.DataFrame(threshold1.transform(data['Review'].values.reshape(-1, 1)))
threshold2 = Binarizer(threshold=80)
res2 = pd.DataFrame(threshold2.transform(data['Price'].values.reshape(-1, 1)))
pf = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)

res3 = pd.DataFrame(
    pf.fit_transform(
        data[['Apartment', 'Beds', 'Review', 'Pic Quality', 'Price']]))

encoder = OneHotEncoder()
data_region1hot = encoder.fit_transform(data['Region'].values.reshape(-1, 1))
data_region = pd.DataFrame(data_region1hot.toarray())
data_weekday1hot = encoder.fit_transform(data['Weekday'].values.reshape(-1, 1))
data_weekday = pd.DataFrame(data_weekday1hot.toarray())
data_reformed = pd.concat(
    [data.drop(columns=['ID']), data_region, data_weekday, res1, res2, res3],