Exemplo n.º 1
0
def calibration():
    steps = []
    for n_estimators, k_folds in product(range(50,300,100), [2,5]):
        d = data.ClassificationData(target=True, n_samples=1000, n_features=100)

        est = step.Construct('sklearn.ensemble.RandomForestClassifier',
                n_estimators=n_estimators, name='estimator') 

        fit = model.Fit(inputs=[est, d], return_estimator=True, target=True, name='uncalibrated')
        predict = model.Predict(inputs=[fit,d], target=True, name='y')

        cal = step.Construct('sklearn.calibration.CalibratedClassifierCV', cv=k_folds,
                inputs=[predict], inputs_mapping={'y':None}, name='calibrator')

        cal_est = model.FitPredict(inputs=[cal, d], target=True, name='calibrated')

        metrics = model.PrintMetrics([
                {'metric':'baseline'},
                {'metric':'precision', 'k':100},
                {'metric':'precision', 'k':200},
                {'metric':'precision', 'k':300},
        ], inputs=[cal_est])

        steps.append(metrics)

    return steps
Exemplo n.º 2
0
def calibration():
    steps = []
    for n_estimators, k_folds in product(range(50, 300, 100), [2, 5]):
        d = data.ClassificationData(n_samples=1000, n_features=100)
        d.target = True

        est = step.Call(ensemble,
                        'RandomForestClassifier',
                        n_estimators=n_estimators)

        fit = model.Fit(inputs=[est, d], return_estimator=True)
        fit.target = True

        predict = model.Predict(inputs=[fit, d])
        predict.target = True

        cal = step.Call('sklearn.calibration.CalibratedClassifierCV',
                        cv=k_folds,
                        inputs=[MapResults([predict], {'y': None})])

        cal_est = model.FitPredict(inputs=[cal, d])
        cal_est.target = True

        steps.append(cal_est)

    return steps
Exemplo n.º 3
0
def n_estimators_search():
    d = data.ClassificationData(n_samples=1000, n_features=100)
    d.target = True
    
    predict = []
    for n_estimators in range(1, 4):
        e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', 
                n_estimators=n_estimators)
        f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True)

        p = model.Predict(inputs=[f, d])
        p.target = True
        predict.append(p)
        
    return predict
Exemplo n.º 4
0
def prediction():
    # generate the data including a training and test split
    d = data.ClassificationData(n_samples=1000, n_features=100)
    d.target = True

    # construct a random forest estimator
    e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=1)
    e.target = False

    # fit the estimator
    f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True)

    # make predictions
    p = model.Predict(inputs=[f, d])
    p.target = True
    return p
Exemplo n.º 5
0
def models(estimators, cv_search, transform_search):
    """
    Grid search prediction workflows. Used by bll6_models, test_models, and product_models.
    Args:
        estimators: collection of steps, each of which constructs an estimator
        cv_search: dictionary of arguments to LeadCrossValidate to search over
        transform_search: dictionary of arguments to LeadTransform to search over

    Returns: a list drain.model.Predict steps constructed by taking the product of
        the estimators with the the result of drain.util.dict_product on each of
        cv_search and transform_search.

        Each Predict step contains the following in its inputs graph:
            - lead.model.cv.LeadCrossValidate
            - lead.model.transform.LeadTransform
            - drain.model.Fit
    """
    steps = []
    for cv_args, transform_args, estimator in product(
            dict_product(cv_search), dict_product(transform_search),
            estimators):

        cv = lead.model.cv.LeadCrossValidate(**cv_args)
        cv.name = 'cv'

        X_train = Call('__getitem__',
                       inputs=[
                           MapResults(
                               [cv], {
                                   'X': 'obj',
                                   'train': 'key',
                                   'test': None,
                                   'aux': None
                               })
                       ])
        mean = Call('mean', inputs=[X_train])
        mean.name = 'mean'

        X_impute = Construct(data.impute,
                             inputs=[
                                 MapResults([cv], {
                                     'aux': None,
                                     'test': None,
                                     'train': None
                                 }),
                                 MapResults([mean], 'value')
                             ])

        cv_imputed = MapResults([X_impute, cv], ['X', {'X': None}])
        cv_imputed.target = True

        transform = lead.model.transform.LeadTransform(inputs=[cv_imputed],
                                                       **transform_args)
        transform.name = 'transform'

        fit = model.Fit(inputs=[estimator, transform], return_estimator=True)
        fit.name = 'fit'

        y = model.Predict(inputs=[fit, transform],
                          return_feature_importances=True)
        y.name = 'predict'
        y.target = True

        steps.append(y)

    return steps