def svms(**kwargs): steps = [] for estimator_args in util.dict_product(dict(penalty=['l2'], dual=[True, False], C=[.001,.01,.1,1])) + \ util.dict_product(dict( penalty=['l1'], dual=[False], C=[.001,.01,.1,1])): steps.append(Construct(name='estimator', __class_name__='sklearn.svm.LinearSVC', **estimator_args)) return steps
def svms(**kwargs): steps = [] for estimator_args in util.dict_product(dict( penalty=['l2'], dual=[True, False], C=[.001, .01, .1, 1])) + \ util.dict_product(dict( penalty=['l1'], dual=[False], C=[.001, .01, .1, 1])): steps.append(Call('sklearn.svm.LinearSVC', **estimator_args)) return steps
def logits(**kwargs): steps = [] for estimator_args in util.dict_product(dict( penalty=['l1', 'l2'], C=[.001, .01, .1, 1], **kwargs)): steps.append(Call('sklearn.linear_model.LogisticRegression', **estimator_args)) return steps
def logits(**kwargs): steps = [] for estimator_args in util.dict_product(dict( penalty=['l1','l2'], C=[.001,.01,.1,1], **kwargs)): steps.append(Construct(name='estimator', __class_name__='sklearn.linear_model.LogisticRegression', **estimator_args)) return steps
def forests(**kwargs): steps = [] d = dict(criterion=['entropy', 'gini'], max_features=['sqrt', 'log2'], n_jobs=[-1], **kwargs) for estimator_args in util.dict_product(d): steps.append(Construct(name='estimator', __class_name__='sklearn.ensemble.RandomForestClassifier', **estimator_args)) return steps
def forests(**kwargs): steps = [] d = dict(criterion=['entropy', 'gini'], max_features=['sqrt', 'log2'], n_jobs=[-1], **kwargs) for estimator_args in util.dict_product(d): steps.append(Call( 'sklearn.ensemble.RandomForestClassifier', **estimator_args)) return steps
def models(estimators, transform_search): steps = [] for transform_args, estimator in product( dict_product(transform_search), estimators): transform = lead.model.transform.LeadTransform( month=1, day=25, name='transform', **transform_args) y = model.FitPredict(inputs=[estimator, transform], name='y', target=True) steps.append(y) return steps
def models(estimators, transform_search): steps = [] for transform_args, estimator in product(dict_product(transform_search), estimators): transform = lead.model.transform.LeadTransform(month=1, day=25, name='transform', **transform_args) y = model.FitPredict(inputs=[estimator, transform], name='y', target=True) steps.append(y) return steps
def models(estimators, cv_search, transform_search): """ Grid search prediction workflows. Used by bll6_models, test_models, and product_models. Args: estimators: collection of steps, each of which constructs an estimator cv_search: dictionary of arguments to LeadCrossValidate to search over transform_search: dictionary of arguments to LeadTransform to search over Returns: a list drain.model.Predict steps constructed by taking the product of the estimators with the the result of drain.util.dict_product on each of cv_search and transform_search. Each Predict step contains the following in its inputs graph: - lead.model.cv.LeadCrossValidate - lead.model.transform.LeadTransform - drain.model.Fit """ steps = [] for cv_args, transform_args, estimator in product( dict_product(cv_search), dict_product(transform_search), estimators): cv = lead.model.cv.LeadCrossValidate(**cv_args) cv.name = 'cv' X_train = Call('__getitem__', inputs=[ MapResults( [cv], { 'X': 'obj', 'train': 'key', 'test': None, 'aux': None }) ]) mean = Call('mean', inputs=[X_train]) mean.name = 'mean' X_impute = Construct(data.impute, inputs=[ MapResults([cv], { 'aux': None, 'test': None, 'train': None }), MapResults([mean], 'value') ]) cv_imputed = MapResults([X_impute, cv], ['X', {'X': None}]) cv_imputed.target = True transform = lead.model.transform.LeadTransform(inputs=[cv_imputed], **transform_args) transform.name = 'transform' fit = model.Fit(inputs=[estimator, transform], return_estimator=True) fit.name = 'fit' y = model.Predict(inputs=[fit, transform], return_feature_importances=True) y.name = 'predict' y.target = True steps.append(y) return steps
def dapply(self, fn, pairwise=False, symmetric=True, diagonal=False, block=None, **kwargs): """ Apply function to each step object in the index Args: fn: function to apply. If a list then each function is applied pairwise: whether to apply the function to pairs of steps symmetric, diagonal, block: passed to apply_pairwise when pairwise=True kwargs: a keyword arguments to pass to each function. Arguments with list value are grid searched using util.dict_product. Returns: a StepFrame or StepSeries """ search_keys = [ k for k, v in kwargs.items() if isinstance(v, list) and len(v) > 1 ] functions = util.make_list(fn) search = list(product(functions, util.dict_product(kwargs))) results = [] for fn, kw in search: if not pairwise: r = self.index.to_series().apply(lambda step: fn(step, **kw)) else: r = apply_pairwise(self, fn, symmetric=symmetric, diagonal=diagonal, block=block, **kw) name = [] if len(functions) == 1 else [fn.__name__] name += util.dict_subset(kw, search_keys).values() if isinstance(r, pd.DataFrame): columns = pd.MultiIndex.from_tuples( [tuple(name + util.make_list(c)) for c in r.columns]) r.columns = columns else: r.name = tuple(name) results.append(r) if len(results) > 1: result = pd.concat(results, axis=1) # get subset of parameters that were searched over column_names = [] if len(functions) == 1 else [None] column_names += search_keys column_names += [None ] * (len(result.columns.names) - len(column_names)) result.columns.names = column_names return StepFrame(result) else: result = results[0] if isinstance(result, pd.DataFrame): return StepFrame(result) else: result.name = functions[0].__name__ return StepSeries(result)