Python SKL_Pipeline示例

编程语言: Python

命名空间/包名称: sklearn.pipeline

类/类型: SKL_Pipeline

hotexamples.com的示例: 3

Python SKL_Pipeline - 已找到3个示例。这些是从开源项目中提取的最受好评的sklearn.pipeline.SKL_Pipeline现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SKL_Pipeline(3)

常用方法

SKL_Pipeline (3)

示例#1

显示文件

    def test_cv_lasso_with_mllib_featurization(self):
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        data = self.sql.createDataFrame(data, ["review", "rating"])

        # Feature extraction using MLlib
        tokenizer = Tokenizer(inputCol="review", outputCol="words")
        hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20000)
        pipeline = Pipeline(stages=[tokenizer, hashingTF])
        data = pipeline.fit(data).transform(data)

        df = self.converter.toPandas(data.select(data.features.alias("review"), "rating"))

        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }

        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])

示例#2

显示文件

文件： test_grid_search_2.py 项目： phi-dbq/spark-sklearn

 def test_cv_pipeline(self):
     pipeline = SKL_Pipeline([
         ('vect', SKL_HashingVectorizer(n_features=20)),
         ('tfidf', SKL_TfidfTransformer(use_idf=False)),
         ('lasso', SKL_Lasso(max_iter=1))
     ])
     parameters = {
         'lasso__alpha': (0.001, 0.005, 0.01)
     }
     grid_search = GridSearchCV(self.sc, pipeline, parameters)
     data = [('hi there', 0.0),
             ('what is up', 1.0),
             ('huh', 1.0),
             ('now is the time', 5.0),
             ('for what', 0.0),
             ('the spark was there', 5.0),
             ('and so', 3.0),
             ('were many socks', 0.0),
             ('really', 1.0),
             ('too cool', 2.0)]
     df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
     skl_gs = grid_search.fit(df.review.values, df.rating.values)
     assert len(skl_gs.grid_scores_) == len(parameters['lasso__alpha'])
     # TODO
     for gs in skl_gs.grid_scores_:
         pass # assert(gs.)

示例#3

显示文件

 def test_cv_linreg(self):
     pipeline = SKL_Pipeline([('lasso', SKL_Lasso(max_iter=1))])
     parameters = {'lasso__alpha': (0.001, 0.005, 0.01)}
     grid_search = GridSearchCV(self.sc, pipeline, parameters)
     X = scipy.sparse.vstack(
         map(lambda x: self.list2csr([x, x + 1.0]), range(0, 100)))
     y = np.array(list(range(0, 100))).reshape((100, 1))
     skl_gs = grid_search.fit(X, y)
     assert len(skl_gs.cv_results_['params']) == len(
         parameters['lasso__alpha'])