示例#1
0
def show_histo(df, bins=20):
    """ plot histograms of columns """

    assert(isinstance(df, pd.DataFrame))

    for c in numeric_cols(df):
        df[c].hist(bins=bins)
        plt.title(c)
        plt.show()
示例#2
0
def get_pipeline(est, is_tree, is_regressor, params):
    name = model_name(est)
    if name.startswith('Dummy'):
        ppl = Pipeline([
                       ('ft', FunctionTransformer()), 
                       ('mo', est)
                      ])
        params['ft__func'] = [lambda x:x[numeric_cols(x)]]
        params['ft__validate'] = [False]
    elif is_tree:
        ppl = Pipeline([
                       ('da', DateEncoder()),
                       ('du', OrdinalEncoder()),
                       ('ft', FunctionTransformer()),
                       ('se', SelectKBest2()),
                       ('mo', est)
                      ])
        params['da__ascategory'] = [False]
        params['du__drop_invariant'] = [True]
        params['ft__func'] = [lambda x:x.fillna(-999)]
        params['ft__validate'] = [False]
        params['se__score_func'] = get_selector(is_regressor, is_tree)
        params['se__k'] = [0.2, 0.5, 0.8, 1000, 1000]
    else:
        ppl = Pipeline([
                ('da', DateEncoder()),
                ('en', FeatureUnion([
                       ('nu', Pipeline([('ft', FunctionTransformer()), ('in', Imputer()), ('sc', TransformerWrap(StandardScaler()))])),
                       ('ca', Pipeline([('ft', FunctionTransformer()), ('sc', SparseCatEncoder())]))
                       ])),
                ('fu', FeatureUnion([('se', SelectKBest2()), ('dr', TruncatedSVD2())])),
                ('mo', est)
                ])
            
        params['en__nu__ft__func'] = [lambda x:x[numeric_cols(x)]]
        params['en__nu__ft__validate'] = [False]
        params['en__ca__ft__func'] = [lambda x:x[object_cols(x)]]
        params['en__ca__ft__validate'] = [False]
        params['fu__se__score_func'] = get_selector(is_regressor, is_tree)
        params['fu__se__k'] = [0.2, 0.5, 0.8, 1000]
        params['fu__dr__k'] = [0.2, 0.5, 0.8, 1000]        
        
    return name, ppl, params
示例#3
0
 def transform(self, X, y=None):
     return X[numeric_cols(X)]
示例#4
0
#x, y = get_iris()

print_summary(x)

ppl = Pipeline([
    ('in', ConstantInputer()), ("da", DateEncoder()),
    ('en',
     FeatureUnion([('nu',
                    Pipeline([('ft', FunctionTransformer()),
                              ("sc", TransformerWrap(StandardScaler()))])),
                   ('ca',
                    make_pipeline(FunctionTransformer(), SparseCatEncoder(),
                                  FunctionTransformer()))])),
    ('fi', make_union(SelectKBest2(), TruncatedSVD2()))
])

params = {
    'en__nu__ft__func': lambda x: x[numeric_cols(x)],
    'en__nu__ft__validate': False,
    'en__ca__functiontransformer-1__func': lambda x: x[object_cols(x)],
    'en__ca__functiontransformer-1__validate': False,
    'en__ca__functiontransformer-2__func':
    lambda x: x.loc[:, x.nunique() > 1],
    'en__ca__functiontransformer-2__validate': False
}

ppl.set_params(**params)

xt = ppl.fit_transform(x, y)

print_summary(xt)