def test_polynomial_features_slow(self): X1 = numpy.arange(6)[:, numpy.newaxis] P1 = numpy.hstack([numpy.ones_like(X1), X1, X1**2, X1**3]) deg1 = 3 X2 = numpy.arange(6).reshape((3, 2)) x1 = X2[:, :1] x2 = X2[:, 1:] P2 = numpy.hstack([ x1**0 * x2**0, x1**1 * x2**0, x1**0 * x2**1, x1**2 * x2**0, x1**1 * x2**1, x1**0 * x2**2 ]) deg2 = 2 for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]: poly = PolynomialFeatures(deg, include_bias=True) P_test = poly.fit_transform(X) self.assertEqual(P_test, P) names = poly.get_feature_names() ext = ExtendedFeatures(kind='poly-slow', poly_degree=deg) e_test = ext.fit_transform(X) e_names = ext.get_feature_names() self.assertEqual(len(names), len(e_names)) self.assertEqual(names, e_names) self.assertEqual(P_test, P) self.assertEqual(P_test.shape, e_test.shape) self.assertEqual(P_test, e_test)
def fcts_model(X, y): model1 = SGDClassifier() model2 = make_pipeline(PolynomialFeatures(), SGDClassifier()) model3 = make_pipeline(ExtendedFeatures(kind='poly'), SGDClassifier()) model4 = make_pipeline(ExtendedFeatures(kind='poly-slow'), SGDClassifier()) model1.fit(PolynomialFeatures().fit_transform(X), y) model2.fit(X, y) model3.fit(X, y) model4.fit(X, y) def partial_fit_model1(X, y, model=model1): return model.partial_fit(X, y) def partial_fit_model2(X, y, model=model2): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) def partial_fit_model3(X, y, model=model3): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) def partial_fit_model4(X, y, model=model4): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) return (partial_fit_model1, partial_fit_model2, partial_fit_model3, partial_fit_model4)
def test_polynomial_features_bigger_ionly(self): X = numpy.arange(30).reshape((5, 6)) for deg in (1, 2, 3, 4, 5): poly = PolynomialFeatures(deg, include_bias=True, interaction_only=True) X_sk = poly.fit_transform(X) names_sk = poly.get_feature_names() ext = ExtendedFeatures(poly_degree=deg, poly_include_bias=True, poly_interaction_only=True) X_ext = ext.fit_transform(X) inames = ["x%d" % i for i in range(0, X.shape[1])] names_ext = ext.get_feature_names(inames) self.assertEqual(len(names_sk), len(names_ext)) self.assertEqual(names_sk, names_ext) names_ext = ext.get_feature_names() self.assertEqual(len(names_sk), len(names_ext)) self.assertEqual(names_sk, names_ext) self.assertEqual(X_sk.shape, X_ext.shape) self.assertEqual(X_sk, X_ext)
def __init__(self, dim=None, **opts): # Models are fitted here. Every not measured # should take place here. assert dim is not None BenchPerfTest.__init__(self, **opts) self.model1 = SGDClassifier() self.model2 = make_pipeline(PolynomialFeatures(), SGDClassifier()) self.model3 = make_pipeline( ExtendedFeatures(kind='poly'), SGDClassifier()) self.model4 = make_pipeline(ExtendedFeatures( kind='poly-slow'), SGDClassifier()) X, y = random_binary_classification(10000, dim) self.model1.fit(PolynomialFeatures().fit_transform(X), y) self.model2.fit(X, y) self.model3.fit(X, y) self.model4.fit(X, y)
def test_polynomial_features_nobias_ionly_slow(self): X1 = numpy.arange(6)[:, numpy.newaxis] P1 = numpy.hstack([numpy.ones_like(X1), X1, X1**2, X1**3]) deg1 = 3 X2 = numpy.arange(6).reshape((3, 2)) x1 = X2[:, :1] x2 = X2[:, 1:] P2 = numpy.hstack([ x1**0 * x2**0, x1**1 * x2**0, x1**0 * x2**1, x1**2 * x2**0, x1**1 * x2**1, x1**0 * x2**2 ]) deg2 = 2 for (deg, X, P) in [(deg1, X1, P1), (deg2, X2, P2)]: fc = [1] if deg == 3 else [1, 2, 4] poly = PolynomialFeatures(deg, include_bias=False, interaction_only=True) P_test = poly.fit_transform(X) names = poly.get_feature_names() self.assertEqual(P_test, P[:, fc]) ext = ExtendedFeatures(kind="poly-slow", poly_degree=deg, poly_include_bias=False, poly_interaction_only=True) e_test = ext.fit_transform(X) e_names = ext.get_feature_names() self.assertEqual(len(names), len(e_names)) self.assertEqual(names, e_names) self.assertEqual(P_test, P[:, fc]) self.assertEqual(P_test.shape, e_test.shape) self.assertEqual(P_test, e_test)
def test_polynomial_features_sparse(self): dtype = numpy.float64 rng = numpy.random.RandomState(0) # pylint: disable=E1101 X = rng.randint(0, 2, (100, 2)) X_sparse = sparse.csr_matrix(X) est = PolynomialFeatures(2) Xt_sparse = est.fit_transform(X_sparse.astype(dtype)) Xt_dense = est.fit_transform(X.astype(dtype)) self.assertIsInstance(Xt_sparse, (sparse.csc_matrix, sparse.csr_matrix)) self.assertEqual(Xt_sparse.dtype, Xt_dense.dtype) self.assertEqual(Xt_sparse.A, Xt_dense) est = ExtendedFeatures(poly_degree=2) Xt_sparse = est.fit_transform(X_sparse.astype(dtype)) Xt_dense = est.fit_transform(X.astype(dtype)) self.assertIsInstance(Xt_sparse, sparse.csc_matrix) self.assertEqual(Xt_sparse.dtype, Xt_dense.dtype) self.assertEqual(Xt_sparse.A, Xt_dense)
def polynomial_features_csr_X_zero_row(self, zero_row_index, deg, interaction_only): X_csr = sparse_random(3, 10, 1.0, random_state=0).tocsr() X_csr[zero_row_index, :] = 0.0 X = X_csr.toarray() est = ExtendedFeatures(poly_degree=deg, poly_include_bias=False, poly_interaction_only=interaction_only) est.fit(X) poly = PolynomialFeatures(degree=deg, include_bias=False, interaction_only=interaction_only) poly.fit(X) self.assertEqual(poly.get_feature_names(), est.get_feature_names()) Xt_dense1 = est.fit_transform(X) Xt_dense2 = poly.fit_transform(X) self.assertEqual(Xt_dense1, Xt_dense2)