def make_adaboost_tree_fit_pipeline(): items = [] items.append( ('features', process_data.make_predictor_pipeline(do_one_hot=True))) items.append(('model', sklearn.ensemble.AdaBoostClassifier( sklearn.tree.DecisionTreeClassifier()))) return sklearn.pipeline.Pipeline(items)
def make_bagging_tree_fit_pipeline(): items = [] items.append( ("features", process_data.make_predictor_pipeline(do_one_hot=True))) items.append(("model", sklearn.ensemble.BaggingClassifier( sklearn.tree.DecisionTreeClassifier()))) return sklearn.pipeline.Pipeline(items)
def main(argv): if len(argv) > 1: filename = argv[1] else: filename = 'a.csv' if os.path.exists(filename): basename, ext = filename.split('.') data = process_data.get_data(filename) predictor_pipeline = process_data.make_predictor_pipeline( do_one_hot=False) label_pipeline = process_data.make_label_pipeline() predictors_processed = predictor_pipeline.fit_transform(data) labels_processed = label_pipeline.fit_transform(data) display_data(predictors_processed, labels_processed, basename) else: print(filename + " doesn't exist.") return
def make_svm_fit_pipeline(): items = [] items.append(("features", process_data.make_predictor_pipeline(do_one_hot=True))) items.append(("model", sklearn.svm.SVC())) return sklearn.pipeline.Pipeline(items)