def __init__(self, pathToData):
     self.dataFilePath = pathToData
     self.algoname = 'Boosting'
     self.datasetName = 'Abalone'
     self.baseEstimater = dtr()
     self.classifier = abr(base_estimator=self.baseEstimater)
     self.cv = 5
Exemplo n.º 2
0
def get_regression_model(algo, poly_Order=2, **kwargs):
    # for key in kwargs: print(key) sys.exit()
    print_mod_info = False
    ### Regression models
    ### https://stackoverflow.com/questions/12860841/python-import-in-if

    if algo == 'XGR':
        mod = xgr(**kwargs)
    elif algo == 'RFR':
        mod = rfr(**kwargs)
    elif algo == 'ABR':
        mod = abr(**kwargs)
    elif algo == 'P1R':
        mod = LinearRegression(**kwargs)
    elif algo == 'P2R':
        mod = make_pipeline(PolynomialFeatures(poly_Order), Ridge(**kwargs))
    elif algo == 'ANN':
        mod = MLPRegressor(**kwargs)
    elif algo == 'ELN':
        mod = ElasticNet(**kwargs)  # add parameters later
    elif algo == 'E2R':
        mod = make_pipeline(PolynomialFeatures(poly_Order),
                            ElasticNet(**kwargs))
    elif algo == 'PLS':
        mod = PLSRegression(**kwargs)
    else:
        print('Algorithm has not yet been added to the menu.')
        sys.exit()

    return mod
Exemplo n.º 3
0
    def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False):
        size = 1.3 * self.report_width // 10

        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor"]          = svr()
        self.models = models

        print('\n')
        print(self.report_width * '*', '\n*')
        print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
        #kf = StratifiedKFold(n_splits=folds, shuffle=True)
        kf = KFold(n_splits=folds)
        results = []
        names = []
        for model_name in models:
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
        print(self.report_width * '*', '')
        report = pd.DataFrame({'Regressor': names, 'Score': results})
        report['Score (avg)'] = report.Score.apply(lambda x: x.mean())
        report['Score (std)'] = report.Score.apply(lambda x: x.std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        display(report)
        print('\n')
        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0)
            plt.show()             
        return None
Exemplo n.º 4
0
def get_regression_model(algo, settings, print_mod_info=False):
    ### Regression models
    ### https://stackoverflow.com/questions/12860841/python-import-in-if
    if algo == 'XGR':
        mod = xgr(n_estimators=settings[0], max_depth=settings[1])
        if print_mod_info: print('XGBoost:', mod)
    elif algo == 'RFR':
        mod = rfr(n_estimators=settings[0])
        if print_mod_info: print('Random Forest:', mod)
    elif algo == 'ABR':
        mod = abr(n_estimators=settings[0])
        if print_mod_info: print('AdaBoost:', mod)
    elif algo == 'P1R':
        mod = LinearRegression()
        if print_mod_info: print('Linear:', mod)
    elif algo == 'P2R':
        mod = make_pipeline(PolynomialFeatures(settings[0]), Ridge())
        if print_mod_info: print('Poly 2:', mod)
    elif algo == 'ANN':
        mod = MLPRegressor(
            solver='lbfgs',
            hidden_layer_sizes=(settings[0], settings[1]),  # (137,73), 
            tol=settings[2])
        if print_mod_info: print('Neural Net Regression:', mod)
    elif algo == 'ELN':
        mod = ElasticNet(alpha=settings[0],
                         l1_ratio=settings[1])  # add parameters later
        if print_mod_info: print('Elastic Net Regression:', mod)
    elif algo == 'E2R':
        mod = make_pipeline(
            PolynomialFeatures(settings[0]),
            ElasticNet(alpha=settings[1], l1_ratio=settings[2]))
        if print_mod_info: print('Poly 2:', mod)
    elif algo == 'PLS':
        mod = PLSRegression(n_components=settings[0])
        if print_mod_info: print('Partial Least Squares Regression:', mod)
    else:
        print('Algorithm not setup yet.')
        sys.exit()

    return mod
    def regression(self, metric, folds=10, alphas=[], printt=True, graph=False):
        size = self.graph_width

        # significant model setup differences should be list as different models
        models = {}
        models["Linear regressor"]                  = lr()
        models["Lasso regressor"]                   = lassor()
        models["Lasso CV regressor"]                = lassocvr()
        models["Ridge regressor"]                   = rr(alpha=0, normalize=True)
        models["Ridge CV regressor"]                = rcvr(alphas = alphas)
        models["Elastic net regressor"]             = enr()
        models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform')
        models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance')
        models["K nearest neighbors regressor K5"]  = knnr(n_neighbors=5)
        models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10)
        models["SGD regressor"]                     = sgdr(max_iter=10000, warm_start=True)
        models["Decision tree regressor"]           = dtr()
        models["Decision tree regressor D3"]        = dtr(max_depth=3)
        models["Random forest regressor"]           = rfr()
        models["Ada boost regressor"]               = abr()
        models["Gradient boost regressor"]          = gbr()
        models["Support vector regressor RBF"]      = svr()
        models["Support vector regressor Linear"]   = svr('linear')
        models["Support vector regressor Poly"]     = svr(kernel='poly')
        self.models = models

        kf = KFold(n_splits=folds, shuffle=True)
        results = []
        names = []
        et = []
        for model_name in models:
            start = time.time()
            cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric)  
            results.append(cv_scores)
            names.append(model_name)
            et.append((time.time() - start))
        report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et})
        report['Score (avg)'] = report.Score.apply(lambda x: np.sqrt(x).mean())
        report['Score (std)'] = report.Score.apply(lambda x: np.sqrt(x).std())
        report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)']
        report.sort_values(by='Score (avg)', inplace=True)
        report.drop('Score', axis=1, inplace=True)
        report.reset_index(inplace=True, drop=True)
        self.report_performance = report
        
        if printt:
            print('\n')
            print(self.report_width * '*', '\n*')
            print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*')
            print(self.report_width * '*', '')
            print(report)
            print('\n')

        if graph:
            fig, ax = plt.subplots(figsize=(size, 0.5 * size))
            plt.title('Regressor Comparison')
            #ax = fig.add_subplot(111)
            plt.boxplot(results)
            ax.set_xticklabels(names)
            plt.xticks(rotation=45)
            plt.subplots_adjust(hspace=0.0, bottom=0.25)
            self.graphs_model.append(fig)
            plt.show()             
        return None