Exemplo n.º 1
0
    def ridgereport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
                                      # Todo: get Logistic feature selection working
                                      ("randlogit", RandomizedLogisticRegression()),
                                      ("logit", LogisticRegression())
                                        ])

            algorithm = "Logistic_Regression"

            baseparam = "{'logit__C': (np.logspace(-2, 2, 10))"

        elif self.modeltype == 'regress':

            algorithm = Ridge()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #                      # Todo: get Logistic feature selection working (test on IU)
            #                     #("randlogit", RandomizedLogisticRegression()),
            #                     ("regress", LinearRegression())

            baseparam = {'regress__alpha': (np.logspace(-2, 2, 10)),
                         'regress__normalize': (True, False)}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        #Todo: make plotting symmetric between regress/class
        # Fit/Predict, run report, and return fit
        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="RidgeRegression")
Exemplo n.º 2
0
    def randomforestreport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            # self.pipeline = Pipeline([("imputer", Imputer(
            #                         axis=0)),
            #                      ("feature_selection", SelectFromModel(
            #                         LinearSVC(), threshold="median")),
            #                      ("randforest", RandomForestClassifier())])

            algorithm = RandomForestClassifier()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            baseparam = {'classify__n_estimators': [10,50,100,250,500]}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        elif self.modeltype == "regress":

            # todo: Add auto feature selection into auto pipeline creation
            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #          #("feature_selection", RFE(
            #          #   RandomForestRegressor(), 6)),
            #          ("regress", RandomForestRegressor())])

            algorithm = RandomForestRegressor()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            baseparam = {'regress__n_estimators': [10,50,100,250,500]}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #          # Todo: get Logistic feature selection working (test on IU)
            #         #("randlogit", RandomizedLogisticRegression()),
            #         ("regress", RandomForestRegressor())
            #          ])

        # Fit/Predict, run report, and return fit
        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="RandomForest")
Exemplo n.º 3
0
    def logitreport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            algorithm = LogisticRegression()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #                      # Todo: get Logistic feature selection working (test on IU)
            #                     ("randlogit", RandomizedLogisticRegression()),
            #                     ("logit", LogisticRegression())
            #                      ])


            baseparam = {'classify__C': (np.logspace(-2, 2, 10))}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        elif self.modeltype == 'regress':

            algorithm = LinearRegression()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #                      # Todo: get Logistic feature selection working (test on IU)
            #                     #("randlogit", RandomizedLogisticRegression()),
            #                     ("regress", LinearRegression())

            baseparam = {'regress__normalize': (False, True)}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        #Todo: make plotting symmetric between regress/class
        #Todo: make plotit, saveit False everywhere by default (such that they're optional parameters)
        # Fit/Predict, run report, and return fit

        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="LinearRegression")
Exemplo n.º 4
0
    def treereport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            self.pipeline = Pipeline([("imputer", Imputer(
                                     axis=0)),
                                 ("feature_selection", SelectFromModel(
                                     LinearSVC(), threshold="median")),
                                 ("trees", DecisionTreeClassifier())])

            title = "Decision_Tree_Classifier"

            #algorithm = DecisionTreeClassifier()

            #self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            # Set the parameters by cross-validation
            self.parameters = {'trees__criterion': ["gini", "entropy"],
                               'trees__class_weight': ["balanced"],
                               'imputer__strategy': ('mean', 'median', 'most_frequent')}

        elif self.modeltype == "regress":

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #          # Todo: get Logistic feature selection working (test on IU)
            #         #("randlogit", RandomizedLogisticRegression()),
            #         ("regress", DecisionTreeRegressor())
            #          ])

            algorithm = DecisionTreeRegressor()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            baseparam = {'regress__splitter': ('best','random')}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        # Fit/Predict, run report, and return fit
        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="DecisionTree")
Exemplo n.º 5
0
    def gradboostreport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            pass
            #todo: set up classifier gridsearch

        elif self.modeltype == "regress":

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #          #("feature_selection", RFE(
            #          #   RandomForestRegressor(), 6)),
            #          ("regress", RandomForestRegressor())])



            algorithm = GradientBoostingRegressor()
            #
            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)
            #
            baseparam = {'regress__loss': ['ls','lad'],
                         #'regress__learning_rate': [0.05, 0.1],
                         'regress__n_estimators': [75,100,250,350,500]}

            #
            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #          # Todo: get Logistic feature selection working (test on IU)
            #         #("randlogit", RandomizedLogisticRegression()),
            #         ("regress", RandomForestRegressor())
            #          ])

        # Fit/Predict, run report, and return fit
        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="GradientBoosting")
Exemplo n.º 6
0
    def svmreport(self, folds, cores, plotit, saveit):
        self.folds = folds
        self.cores = cores

        if self.modeltype == 'classify':

            self.pipeline = Pipeline([("imputer", Imputer(
                                    axis=0)),
                                 ("feature_selection", SelectFromModel(
                                    LinearSVC(), threshold="median")),
                                 ("lsvc", LinearSVC())])

            # Set the parameters by cross-validation
            self.parameters = {'imputer__strategy': ('mean', 'median', 'most_frequent'),
                          'lsvc__C': (np.logspace(-2, 2, 10))}

            title = "Linear_SVC"

        elif self.modeltype == "regress":

            # self.pipeline = Pipeline([("imputer", Imputer(axis=0)),
            #                       # Todo: get Logistic feature selection working (test on IU)
            #                       #("randlogit", RandomizedLogisticRegression()),
            #                      ("lsvr", LinearSVR())])

            algorithm = LinearSVR()

            self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype)

            baseparam = {'regress__C': (np.logspace(-2, 2, 10))}

            self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype)

        # todo: fix plt.show() stopping program execution
        # Fit/Predict, run report, and return fit
        return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="SVM")