def calibration(): steps = [] for n_estimators, k_folds in product(range(50, 300, 100), [2, 5]): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True est = step.Call(ensemble, 'RandomForestClassifier', n_estimators=n_estimators) fit = model.Fit(inputs=[est, d], return_estimator=True) fit.target = True predict = model.Predict(inputs=[fit, d]) predict.target = True cal = step.Call('sklearn.calibration.CalibratedClassifierCV', cv=k_folds, inputs=[MapResults([predict], {'y': None})]) cal_est = model.FitPredict(inputs=[cal, d]) cal_est.target = True steps.append(cal_est) return steps
def calibration(): steps = [] for n_estimators, k_folds in product(range(50,300,100), [2,5]): d = data.ClassificationData(target=True, n_samples=1000, n_features=100) est = step.Construct('sklearn.ensemble.RandomForestClassifier', n_estimators=n_estimators, name='estimator') fit = model.Fit(inputs=[est, d], return_estimator=True, target=True, name='uncalibrated') predict = model.Predict(inputs=[fit,d], target=True, name='y') cal = step.Construct('sklearn.calibration.CalibratedClassifierCV', cv=k_folds, inputs=[predict], inputs_mapping={'y':None}, name='calibrator') cal_est = model.FitPredict(inputs=[cal, d], target=True, name='calibrated') metrics = model.PrintMetrics([ {'metric':'baseline'}, {'metric':'precision', 'k':100}, {'metric':'precision', 'k':200}, {'metric':'precision', 'k':300}, ], inputs=[cal_est]) steps.append(metrics) return steps
def test_to_hdf(): d = data.ClassificationData() h = data.ToHDF(inputs=[d], target=True) h.setup_dump() h.execute() r0, r1 = h.get_result(), d.get_result() for key in r1.keys(): assert r0[key].equals(r1[key])
def product_model(): d = data.ClassificationData(target=True, n_samples=1000, n_features=100) est = step.Construct('sklearn.ensemble.RandomForestClassifier', n_estimators=10, name='estimator') m1 = model.FitPredict(inputs=[est, d], target=True, name='m1') m2 = model.FitPredict(inputs=[est, d], target=True, name='m2') p = model.PredictProduct(inputs=[m1,m2], target=True, inputs_mapping=['m1', 'm2'], name='p') return p
def n_estimators_search(): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True predict = [] for n_estimators in range(1, 4): e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=n_estimators) f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True) p = model.Predict(inputs=[f, d]) p.target = True predict.append(p) return predict
def prediction(): # generate the data including a training and test split d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True # construct a random forest estimator e = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=1) e.target = False # fit the estimator f = model.Fit(inputs=[e, d], return_estimator=True, return_feature_importances=True) # make predictions p = model.Predict(inputs=[f, d]) p.target = True return p
def product_model(): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True est = step.Call(ensemble, 'RandomForestClassifier', n_estimators=10) est.name = 'estimator' m1 = model.FitPredict(inputs=[est, d]) m1.target = True m1.name = 'm1' m2 = model.FitPredict(inputs=[est, d]) m2.target = True m2.name = 'm2' p = model.PredictProduct(inputs=[MapResults([m1, m2], ['m1', 'm2'])]) p.target = True p.name = 'p' return p
def product_model(): d = data.ClassificationData(n_samples=1000, n_features=100) d.target = True est = step.Construct(_class='sklearn.ensemble.RandomForestClassifier', n_estimators=10) est.name = 'estimator' m1 = model.FitPredict(inputs=[est, d]) m1.target = True m1.name = 'm1' m2 = model.FitPredict(inputs=[est, d]) m2.target = True m2.name = 'm2' p = model.PredictProduct(inputs=[m1,m2], inputs_mapping=['m1', 'm2']) p.target = True p.name = 'p' return p