def test_get_schema_returns_correct_value_for_vector_valued_columns(self):
        pipeline = Pipeline([OneHotVectorizer() << 'c0'])
        pipeline.fit(train_df)

        schema = pipeline.get_output_columns()

        self.assertTrue('c0.a' in schema)
        self.assertTrue('c0.b' in schema)
        self.assertTrue('c1' in schema)
        self.assertTrue('c2' in schema)

        self.assertEqual(len(schema), 4)
    def test_get_schema_does_not_work_when_predictor_is_part_of_model(self):
        df = train_df.drop(['c0'], axis=1)

        pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')])
        pipeline.fit(df)

        try:
            schema = pipeline.get_output_columns()
        except Exception as e:
            pass
        else:
            self.fail()
    def test_get_schema_returns_correct_value_for_single_valued_columns(self):
        df = train_df.drop(['c0'], axis=1)

        pipeline = Pipeline([RangeFilter(min=0.0, max=4.5) << 'c2'])
        pipeline.fit(df)
        df = pipeline.transform(df)

        schema = pipeline.get_output_columns()

        self.assertTrue('c1' in schema)
        self.assertTrue('c2' in schema)

        self.assertEqual(len(schema), 2)
示例#4
0
文件: Schema.py 项目: yazici/NimbusML
# Get schema from a fitted pipeline example.
import numpy as np
import pandas as pd
from nimbusml import Pipeline, FileDataStream
from nimbusml.datasets import get_dataset
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.feature_extraction.text.extractor import Ngram

# data input (as a FileDataStream)
path = get_dataset("wiki_detox_train").as_filepath()

data = FileDataStream.read_csv(path, sep='\t')
print(data.head())
#    Sentiment                                      SentimentText
# 0          1  ==RUDE== Dude, you are rude upload that carl p...
# 1          1  == OK! ==  IM GOING TO VANDALIZE WILD ONES WIK...
# 2          1  Stop trolling, zapatancas, calling me a liar m...
# 3          1  ==You're cool==  You seem like a really cool g...
# 4          1  ::::: Why are you threatening me? I'm not bein...

pipe = Pipeline([
    NGramFeaturizer(word_feature_extractor=Ngram(),
                    columns={'features': ['SentimentText']})
])

pipe.fit(data)
schema = pipe.get_output_columns()

print(schema[0:5])
# ['Sentiment', 'SentimentText', 'features.Char.<␂>|=|=', 'features.Char.=|=|r', 'features.Char.=|r|u']