def test_partial_dependence_no_shadowing():
    # Non-regression test for:
    # https://github.com/scikit-learn/scikit-learn/issues/15842
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=FutureWarning)
        from sklearn.inspection.partial_dependence import partial_dependence as _  # noqa

        # Calling all_estimators() also triggers a recursive import of all
        # submodules, including deprecated ones.
        all_estimators()

    from sklearn.inspection import partial_dependence
    assert isinstance(partial_dependence, types.FunctionType)
 def fit(self, train_x, train_y, folds=3):
     estimators = all_estimators(type_filter="classifier")
     for name, ClassifierClass in estimators:
         if name in model_param_map:
             param_grid = model_param_map[name]
             grid_clf = GridSearchCV(
                 ClassifierClass(),
                 param_grid,
                 cv=folds,
                 scoring="accuracy",
                 verbose=0,
                 n_jobs=-1,
             )
             start = time.time()
             grid_clf.fit(train_x, train_y)
             end = time.time()
             clf = SimpleClassifier()
             clf.metrics["Training Accuracy"] = grid_clf.best_score_
             pred_y = grid_clf.predict(train_x)
             clf.metrics["Jaccard Score"] = jaccard_score(train_y,
                                                          pred_y,
                                                          average="macro")
             clf.metrics["F1 Score"] = f1_score(train_y,
                                                pred_y,
                                                average="macro")
             clf.sk_model = grid_clf.best_estimator_
             clf.name = name
             clf.attributes = grid_clf.best_params_
             clf.train_duration = grid_clf.refit_time_
             clf.gridsearch_duration = end - start
             self.ranked_list.append(clf)
     metrik = lambda clf: clf.metrics[self.metric]
     self.ranked_list.sort(reverse=True, key=metrik)
    def quick_test(self, filter_type="classifier", max_threads=5, save=True):
        print("*Quick test for multiple classification models!")
        threads = []
        for name, estimator_class in all_estimators(filter_type):
            print(f"*start training: {name} model.")
            try:
                model = estimator_class()
                thread = TrainModelThread(
                    self.train_X,
                    self.train_y,
                    self.test_X,
                    self.test_y,
                    model,
                    filter_type,
                    name,
                    save,
                )
                threads.append(thread)
                thread.start()
            except:
                print(f"*Failed to initialize model: {name}.")

        for thread in threads:
            thread.join()
        print("*Training of all classification models are finished!")
Exemplo n.º 4
0
def Cvalidation(iris_data):

    y = iris_data.loc[:,"Name"]
    x = iris_data.loc[:,["SepalLength","SepalWidth","PetalLength","PetalWidth"]]

    # classifierのアルゴリズムすべてを取得する
    warnings.filterwarnings("ignore")
    allAlgorithms = all_estimators(type_filter="classifier")

    # K分割クロスバリテーション用オブジェクト
    kfold_cv = KFold(n_splits=5, shuffle=True)

    for(name,algorithm) in allAlgorithms:
        
        try :
           # 各アリゴリズムのオブジェクトを作成
            if(name == "LinearSVC") :
                clf = algorithm(max_iter = 10000)
            else:
                clf = algorithm()

            # scoreメソッドを持つクラスを対象とする
            if hasattr(clf,"score"):
        
            # クロスバリデーションを行う
                scores = cross_val_score(clf,x,y,cv=kfold_cv)
                print(name,"の正解率=")
                print(scores)
        except Exception as e :
            pass
Exemplo n.º 5
0
def Salgorithm(iris_data):

    # アヤメデータをラベルと入力データに分離
    y = iris_data.loc[:,"Name"]
    x = iris_data.loc[:,["SepalLength","SepalWidth","PetalLength","PetalWidth"]]

    # 学習用とテスト用に分離する
    x_train,X_test,y_train,Y_test = train_test_split(x,y,test_size = 0.2,train_size = 0.8,shuffle = True)

    # classifierのアルゴリズムすべてを取得する
    warnings.filterwarnings("ignore")
    allAlgorithms = all_estimators(type_filter="classifier")

    for(name,algorithm) in allAlgorithms:
        try :
            # 各アリゴリズムのオブジェクトを作成
            clf = algorithm()

         # 学習して、評価する
            clf.fit(x_train, y_train)
            y_pred = clf.predict(X_test)
            print(name,"の正解率 = " , accuracy_score(Y_test, y_pred))
  
            # WarningやExceptionの内容を表示する
        except Warning as w :
            print("\033[33m"+"Warning:"+"\033[0m", name, ":", w.args)
        except Exception as e :
            #print("\033[31m"+"Error:"+"\033[0m", name, ":", e.args)
            pass
Exemplo n.º 6
0
    def fit(self, train_x, train_y, folds=3):
        """
        Trains all regressors from parameter grid by running model algorithm search.

        Creates a ranked list of models based on selected scoring metric.

        Parameters
        ----------
        train_x : numpy.ndarray
            The features for training regression model
        train_y : numpy.ndarray
            The corresponding label for feature array
        folds : int, optional
            The number of folds for cross validation
        """

        estimators = all_estimators(type_filter="regressor")
        with tqdm(
                total=(len(model_param_map)),
                desc="Creating Regressor List",
                unit=" Regressor",
                ncols=100,
        ) as progressbar:
            for name, RegressionClass in estimators:
                if name in model_param_map:
                    param_grid = model_param_map[name]
                    grid_rgr = GridSearchCV(
                        RegressionClass(),
                        param_grid,
                        cv=folds,
                        scoring="neg_root_mean_squared_error",
                        verbose=0,
                        n_jobs=-1,
                        error_score="raise",
                    )
                    progressbar.update(1)
                    start = time.time()
                    try:
                        grid_rgr.fit(train_x, train_y)
                    except BaseException as error:
                        self.logger.warning(
                            f"{name} failed due to, Error : {error}.")
                        continue
                    end = time.time()
                    rgr = SimpleRegressor()
                    rgr.metrics["Training Score"] = -grid_rgr.best_score_
                    pred_y = grid_rgr.predict(train_x)
                    rgr.metrics["Mean Absolute Error"] = mean_absolute_error(
                        train_y, pred_y)
                    rgr.metrics["Mean Squared Error"] = mean_squared_error(
                        train_y, pred_y)
                    rgr.metrics["R-Squared"] = r2_score(train_y, pred_y)
                    rgr.sk_model = grid_rgr.best_estimator_
                    rgr.name = name
                    rgr.attributes = grid_rgr.best_params_
                    rgr.train_duration = grid_rgr.refit_time_
                    rgr.gridsearch_duration = end - start
                    self.ranked_list.append(rgr)
                metrik = lambda rgr: rgr.metrics[self.metric]
                self.ranked_list.sort(reverse=False, key=metrik)
Exemplo n.º 7
0
 def fit(self, train_x, train_y, folds=3):
     estimators = all_estimators(type_filter="classifier")
     for name, ClassifierClass in estimators:
         if name in model_param_map:
             param_grid = model_param_map[name]
             grid_clf = GridSearchCV(
                 ClassifierClass(),
                 param_grid,
                 cv=folds,
                 scoring="accuracy",
                 verbose=0,
                 n_jobs=-1,
             )
             start = time.time()
             grid_clf.fit(train_x, train_y)
             end = time.time()
             if grid_clf.best_score_ > self.metrics.get(
                     "Training Accuracy", 0.0):
                 self.metrics["Training Accuracy"] = grid_clf.best_score_
                 pred_y = grid_clf.predict(train_x)
                 self.metrics["Jaccard Score"] = jaccard_score(
                     train_y, pred_y, average="macro")
                 self.metrics["F1 Score"] = f1_score(train_y,
                                                     pred_y,
                                                     average="macro")
                 self.sk_model = grid_clf.best_estimator_
                 self.name = name
                 self.attributes = grid_clf.best_params_
                 self.train_duration = grid_clf.refit_time_
                 self.gridsearch_duration = end - start
    def quick_test(self, filter_type="classifier", max_threads=5, save=True):
        label_df = pd.read_csv("data/train_label.csv",
                               index_col="arrival_date")
        print("*Quick test for multiple classification models!")
        threads = []
        for name, estimator_class in all_estimators(filter_type):
            print(f"*start training: {name} model.")
            model = estimator_class()
            try:
                model = estimator_class()
                thread = TrainModelThread2(
                    self.X_df.copy(),
                    self.X_train.copy(),
                    self.y_train.copy(),
                    self.X_test.copy(),
                    self.y_test.copy(),
                    label_df.copy(),
                    model,
                    filter_type,
                    name,
                    save,
                )
                threads.append(thread)
                thread.start()
                if len(threads) > 5:
                    break
            except:
                print(f"*Failed to initialize model: {name}.")

        for thread in threads:
            thread.join()
        print("*Training of all classification models are finished!")
Exemplo n.º 9
0
def _all_estimators():
    try:
        from sklearn.utils import all_estimators

        return all_estimators()
    except ImportError:
        return _backported_all_estimators()
Exemplo n.º 10
0
def test_all_estimator_no_base_class():
    # test that all_estimators doesn't find abstract classes.
    for name, Estimator in all_estimators():
        msg = (
            "Base estimators such as {0} should not be included in all_estimators"
        ).format(name)
        assert not name.lower().startswith("base"), msg
Exemplo n.º 11
0
    def make_paragraph_for_estimator_type(estimator_type):
        intro = nodes.list_item()
        intro += nodes.strong(
            text="Estimators that allow NaN values for type ")
        intro += nodes.literal(text=f"{estimator_type}")
        intro += nodes.strong(text=":\n")
        exists = False
        lst = nodes.bullet_list()
        for name, est_class in all_estimators(type_filter=estimator_type):
            with suppress(SkipTest):
                est = _construct_instance(est_class)

            if est._get_tags().get("allow_nan"):
                module_name = ".".join(est_class.__module__.split(".")[:2])
                class_title = f"{est_class.__name__}"
                class_url = f"generated/{module_name}.{class_title}.html"
                item = nodes.list_item()
                para = nodes.paragraph()
                para += nodes.reference(class_title,
                                        text=class_title,
                                        internal=False,
                                        refuri=class_url)
                exists = True
                item += para
                lst += item
        intro += lst
        return [intro] if exists else None
Exemplo n.º 12
0
def getClassifiers():
    from sklearn.utils import all_estimators
    import sklearn
    import xgboost as xgb
    estimators = all_estimators()

    classifiers = []
    
    
    
    classifiers.append(sklearn.ensemble._bagging.BaggingClassifier())                   # OK
    classifiers.append(sklearn.tree._classes.DecisionTreeClassifier())                  # OK
    classifiers.append(sklearn.ensemble._forest.ExtraTreesClassifier())                 # OK 
    classifiers.append(sklearn.naive_bayes.BernoulliNB())                               # OK
    classifiers.append(sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier())              # OK
    classifiers.append(xgb.XGBClassifier())
    classifiers.append(sklearn.linear_model._logistic.LogisticRegressionCV())               # OK
    classifiers.append(sklearn.tree._classes.ExtraTreeClassifier())                       # OK
    classifiers.append(sklearn.ensemble._forest.RandomForestClassifier())                  # OK
    classifiers.append(sklearn.linear_model._logistic.LogisticRegression())                 # OK
    
    
    #classifiers.append(sklearn.ensemble._gb.GradientBoostingClassifier())                 # OK tar lång tid

    
    c = []
    for classifier in classifiers:
        c.append(type(classifier).__name__)



    return c
Exemplo n.º 13
0
 def reg_dict():
     _all_regressors = {}
     estimators = all_estimators()
     for name, class_ in estimators:
         if issubclass(class_, base.RegressorMixin):
             _all_regressors[name] = class_
     return _all_regressors
Exemplo n.º 14
0
def getClassifiers():
    from sklearn.utils import all_estimators
    import sklearn
    import xgboost as xgb
    estimators = all_estimators()

    classifiers = []
    classifiers.append(sklearn.ensemble._gb.GradientBoostingClassifier())                 # OK
    #classifiers.append(sklearn.neighbors._classification.KNeighborsClassifier())          # OK
    classifiers.append(sklearn.linear_model._logistic.LogisticRegressionCV())             # OK
    classifiers.append(sklearn.svm._classes.NuSVC())                                       # OK
    classifiers.append(sklearn.gaussian_process._gpc.GaussianProcessClassifier())         # OK
    classifiers.append(sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis())       # OK
    classifiers.append(sklearn.linear_model._logistic.LogisticRegression())                 # OK
    classifiers.append(xgb.XGBClassifier())
    classifiers.append(sklearn.naive_bayes.BernoulliNB())                               # OK
    classifiers.append(sklearn.svm._classes.SVC())                                         # OK Men kolla output! Verkar vara ensidig...
    classifiers.append(sklearn.tree._classes.DecisionTreeClassifier())                   # OK
    classifiers.append(sklearn.calibration.CalibratedClassifierCV(base_estimator=sklearn.ensemble._weight_boosting.AdaBoostClassifier()))          # OK
    classifiers.append(sklearn.linear_model._stochastic_gradient.SGDClassifier())          # OK
    classifiers.append(sklearn.naive_bayes.GaussianNB())                                  # OK
    classifiers.append(sklearn.neural_network._multilayer_perceptron.MLPClassifier())       # OK
    classifiers.append(sklearn.multiclass.OneVsRestClassifier(sklearn.ensemble._weight_boosting.AdaBoostClassifier()))    # OK
    classifiers.append(sklearn.ensemble._forest.RandomForestClassifier())                  # OK
    classifiers.append(sklearn.tree._classes.ExtraTreeClassifier())                       # OK
    classifiers.append(sklearn.ensemble._forest.ExtraTreesClassifier())                   # OK
    classifiers.append(sklearn.discriminant_analysis.LinearDiscriminantAnalysis())          # OK
    classifiers.append(sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier())              # OK
    
    c = []
    for classifier in classifiers:
        c.append(type(classifier).__name__)

    return c
Exemplo n.º 15
0
def _generate_meta_estimator_instances_with_pipeline():
    """Generate instances of meta-estimators fed with a pipeline

    Are considered meta-estimators all estimators accepting one of "estimator",
    "base_estimator" or "estimators".
    """
    for _, Estimator in sorted(all_estimators()):
        sig = set(signature(Estimator).parameters)

        if "estimator" in sig or "base_estimator" in sig or "regressor" in sig:
            if is_regressor(Estimator):
                estimator = make_pipeline(TfidfVectorizer(), Ridge())
                param_grid = {"ridge__alpha": [0.1, 1.0]}
            else:
                estimator = make_pipeline(TfidfVectorizer(),
                                          LogisticRegression())
                param_grid = {"logisticregression__C": [0.1, 1.0]}

            if "param_grid" in sig or "param_distributions" in sig:
                # SearchCV estimators
                extra_params = {"n_iter": 2} if "n_iter" in sig else {}
                yield Estimator(estimator, param_grid, **extra_params)
            else:
                yield Estimator(estimator)

        elif "transformer_list" in sig:
            # FeatureUnion
            transformer_list = [
                ("trans1", make_pipeline(TfidfVectorizer(), MaxAbsScaler())),
                (
                    "trans2",
                    make_pipeline(TfidfVectorizer(),
                                  StandardScaler(with_mean=False)),
                ),
            ]
            yield Estimator(transformer_list)

        elif "estimators" in sig:
            # stacking, voting
            if is_regressor(Estimator):
                estimator = [
                    ("est1", make_pipeline(TfidfVectorizer(),
                                           Ridge(alpha=0.1))),
                    ("est2", make_pipeline(TfidfVectorizer(), Ridge(alpha=1))),
                ]
            else:
                estimator = [
                    (
                        "est1",
                        make_pipeline(TfidfVectorizer(),
                                      LogisticRegression(C=0.1)),
                    ),
                    ("est2",
                     make_pipeline(TfidfVectorizer(),
                                   LogisticRegression(C=1))),
                ]
            yield Estimator(estimator)

        else:
            continue
Exemplo n.º 16
0
def _tested_estimators():
    for name, Estimator in all_estimators():
        try:
            estimator = _construct_instance(Estimator)
        except SkipTest:
            continue

        yield estimator
Exemplo n.º 17
0
def _tested_estimators(type_filter=None):
    for name, Estimator in all_estimators(type_filter=type_filter):
        try:
            estimator = _construct_instance(Estimator)
        except SkipTest:
            continue

        yield estimator
Exemplo n.º 18
0
def test_all_estimators_all_public():
    # all_estimator should not fail when pytest is not installed and return
    # only public estimators
    with warnings.catch_warnings(record=True) as record:
        estimators = all_estimators()
    # no warnings are raised
    assert not record
    for est in estimators:
        assert not est.__class__.__name__.startswith("_")
Exemplo n.º 19
0
def _tested_estimators():
    for name, Estimator in all_estimators():
        if issubclass(Estimator, BiclusterMixin):
            continue
        try:
            estimator = _construct_instance(Estimator)
        except SkipTest:
            continue

        yield estimator
Exemplo n.º 20
0
    def fit(self, train_x, train_y, folds=3):
        """Trains all classification models from
        parameter grid by running model algorithm search.

        Creates a ranked list of models based on selected
        scoring metric.

        Parameters
        ----------
        train_x : numpy.ndarray
            The features for training classification model
        train_y : numpy.ndarray
            The corresponding label for feature array
        folds : int, optional
            The number of folds for cross validation
        """

        estimators = all_estimators(type_filter="classifier")
        for name, ClassifierClass in estimators:
            if name in model_param_map:
                param_grid = model_param_map[name]
                grid_clf = GridSearchCV(
                    ClassifierClass(),
                    param_grid,
                    cv=folds,
                    scoring="accuracy",
                    verbose=0,
                    n_jobs=-1,
                )
                start = time.time()
                try:
                    grid_clf.fit(train_x, train_y)
                except BaseException as error:
                    self.logger.warning(
                        f"{name} failed due to, Error : {error}.")
                    continue
                end = time.time()
                clf = SimpleClassifier()
                clf.metrics["Training Accuracy"] = grid_clf.best_score_
                pred_y = grid_clf.predict(train_x)
                clf.metrics["Jaccard Score"] = jaccard_score(train_y,
                                                             pred_y,
                                                             average="macro")
                clf.metrics["F1 Score"] = f1_score(train_y,
                                                   pred_y,
                                                   average="macro")
                clf.sk_model = grid_clf.best_estimator_
                clf.name = name
                clf.attributes = grid_clf.best_params_
                clf.train_duration = grid_clf.refit_time_
                clf.gridsearch_duration = end - start
                self.ranked_list.append(clf)
        metrik = lambda clf: clf.metrics[self.metric]
        self.ranked_list.sort(reverse=True, key=metrik)
Exemplo n.º 21
0
    def fit(self, train_x, train_y, folds=3):
        """Trains the optimal regression model
                 on given dataset by running model algorithm search.
                 If the argument folds isn't passed, the default
                 value(3) is used.
                 Parameters
                 ----------
                 train_x : numpy.ndarray
                     The features for training classification model
                 train_y : numpy.ndarray
                     The corresponding label for feature array
                 folds : int, optional
                     The number of folds for cross validation
                 """

        estimators = all_estimators(type_filter="regressor")
        for name, RegressionClass in estimators:
            if name in model_param_map:
                param_grid = model_param_map[name]
                grid_rgr = GridSearchCV(
                    RegressionClass(),
                    param_grid,
                    cv=folds,
                    scoring="neg_root_mean_squared_error",
                    verbose=0,
                    n_jobs=-1,
                    error_score="raise",
                )
                start = time.time()
                try:
                    grid_rgr.fit(train_x, train_y)
                except BaseException as error:
                    self.failed_models.append(name)
                    self.logger.warning(
                        f"{name} failed due to, Error : {error}.")
                    continue
                end = time.time()
                if self.metrics.get(
                        "Training Score"
                ) is None or -grid_rgr.best_score_ < self.metrics.get(
                        "Training Score"):
                    self.metrics["Training Score"] = -grid_rgr.best_score_
                    pred_y = grid_rgr.predict(train_x)
                    self.metrics["Mean Absolute Error"] = mean_absolute_error(
                        train_y, pred_y)
                    self.metrics["Mean Square Error"] = mean_squared_error(
                        train_y, pred_y)
                    self.metrics["R-Squared"] = r2_score(train_y, pred_y)
                    self.sk_model = grid_rgr.best_estimator_
                    self.name = name
                    self.attributes = grid_rgr.best_params_
                    self.train_duration = grid_rgr.refit_time_
                    self.gridsearch_duration = end - start
Exemplo n.º 22
0
    def fit(self, train_x, train_y, folds=3):
        """Trains the optimal classification model
        on given dataset by running model algorithm search.

        If the argument folds isn't passed, the default
        value(3) is used.

        Parameters
        ----------
        train_x : numpy.ndarray
            The features for training classification model
        train_y : numpy.ndarray
            The corresponding label for feature array
        folds : int, optional
            The number of folds for cross validation
        """

        estimators = all_estimators(type_filter="classifier")
        for name, ClassifierClass in estimators:
            if name in model_param_map:
                param_grid = model_param_map[name]
                grid_clf = GridSearchCV(
                    ClassifierClass(),
                    param_grid,
                    cv=folds,
                    scoring="accuracy",
                    verbose=0,
                    n_jobs=-1,
                )
                start = time.time()
                try:
                    grid_clf.fit(train_x, train_y)
                except BaseException as error:
                    self.failed_models.append(name)
                    self.logger.warning(
                        f"{name} failed due to, Error : {error}.")
                    continue
                end = time.time()
                if grid_clf.best_score_ > self.metrics.get(
                        "Training Accuracy", 0.0):
                    self.metrics["Training Accuracy"] = grid_clf.best_score_
                    pred_y = grid_clf.predict(train_x)
                    self.metrics["Jaccard Score"] = jaccard_score(
                        train_y, pred_y, average="macro")
                    self.metrics["F1 Score"] = f1_score(train_y,
                                                        pred_y,
                                                        average="macro")
                    self.sk_model = grid_clf.best_estimator_
                    self.name = name
                    self.attributes = grid_clf.best_params_
                    self.train_duration = grid_clf.refit_time_
                    self.gridsearch_duration = end - start
Exemplo n.º 23
0
    def valid_components(self):
        """Find all supported regressors.

        Returns:
            valid_components: numpy.array([[regressor name, object], ...])
                Valid regressors
        """
        if not hasattr(self, "valid_components_r"):
            regressors = np.array([est for est in all_estimators() if
                                   issubclass(est[1], RegressorMixin)])

            self.valid_components_r = regressors
        return self.valid_components_r
Exemplo n.º 24
0
def _tested_linear_classifiers():
    classifiers = all_estimators(type_filter="classifier")

    with warnings.catch_warnings(record=True):
        for name, clazz in classifiers:
            required_parameters = getattr(clazz, "_required_parameters", [])
            if len(required_parameters):
                # FIXME
                continue

            if "class_weight" in clazz().get_params().keys() and issubclass(
                    clazz, LinearClassifierMixin):
                yield name, clazz
Exemplo n.º 25
0
    def valid_components(self):
        """Find all supported classifiers.

        Returns:
            valid_components: numpy.array([[classifier name, object], ...])
                Valid classifiers
        """
        if not hasattr(self, "valid_components_c"):
            classifiers = np.array([
                est for est in all_estimators()
                if issubclass(est[1], ClassifierMixin)
            ])

            self.valid_components_c = classifiers
        return self.valid_components_c
Exemplo n.º 26
0
def get_all_methods():
    estimators = all_estimators()
    for name, Estimator in estimators:
        if name.startswith("_"):
            # skip private classes
            continue
        methods = []
        for name in dir(Estimator):
            if name.startswith("_"):
                continue
            method_obj = getattr(Estimator, name)
            if hasattr(method_obj, "__call__") or isinstance(method_obj, property):
                methods.append(name)
        methods.append(None)

        for method in sorted(methods, key=lambda x: str(x)):
            yield Estimator, method
Exemplo n.º 27
0
    def generate_curriculum(self, X, y, path, method):

        X = np.array(X)
        y = np.array(y)

        if method == 'kdn':
            score = self.kdn_score(X, y, 50)
            curriculum_df = pd.DataFrame(score, columns=['score'])
            curriculum_df.reset_index(inplace=True)
            curriculum_df.to_csv(path, index=False)
        elif method == 'faiss_kdn':
            score = FaissKNNClassifier().faiss_kdn_score(X, y, 50)
            curriculum_df = pd.DataFrame(score, columns=['score'])
            curriculum_df.reset_index(inplace=True)
            curriculum_df.to_csv(path, index=False)
        elif method == 'gmm':
            curriculum_df = self.GMM_IH(X, y)
            curriculum_df.to_csv(path, index=False)

        elif method == 'ensemble':
            estimators = all_estimators(type_filter='classifier')

            clf_l = ["RandomForestClassifier", "MLPClassifier", "SVC"]

            classifiers = []
            for name, class_ in estimators:
                if hasattr(class_, 'predict_proba') and name in clf_l:
                    if name == "SVC":
                        clf = class_(probability=True)
                    else:
                        clf = class_()
                    classifiers.append(clf)

            estimator = VotingClassifier(estimators=[('mlp', classifiers[0]),
                                                     ('rf', classifiers[1]),
                                                     ('svm', classifiers[2])],
                                         voting='soft')
            curriculum_df = self.ensemble_hardness(X, y, estimator)
            curriculum_df.to_csv(path, index=False)
        else:
            print("Aborting generation")

        return
Exemplo n.º 28
0
    elif Estimator.__name__ == "Pipeline":
        return Estimator(steps=[("clf", LogisticRegression())])
    elif Estimator.__name__ == "FeatureUnion":
        return Estimator(transformer_list=[("transformer", FunctionTransformer())])


def _construct_sparse_coder(Estimator):
    # XXX: hard-coded assumption that n_features=3
    dictionary = np.array(
        [[0, 1, 0], [-1, -1, 2], [1, 1, 1], [0, 1, 1], [0, 2, 1]],
        dtype=np.float64,
    )
    return Estimator(dictionary=dictionary)


@pytest.mark.parametrize("name, Estimator", all_estimators())
def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip("numpydoc")
    from numpydoc import docscrape

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc["Attributes"]

    if Estimator.__name__ in (
        "HalvingRandomSearchCV",
        "RandomizedSearchCV",
        "HalvingGridSearchCV",
        "GridSearchCV",
    ):
        est = _construct_searchcv_instance(Estimator)
    elif Estimator.__name__ in (
Exemplo n.º 29
0
dataset = load_wine()
x = dataset.data
y = dataset.target

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=44)

import sklearn
print(sklearn.__version__)  # 0.23.2

# all_estimators -> 0.20 에 최적화되어있다.

allAlgorithms = all_estimators(type_filter='classifier')  # sklearn의 분류형 모델 전체
# print(allAlgorithms)

for (name, algorithm) in allAlgorithms:
    try:
        model = algorithm()

        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print(name, '의 정답율 :', accuracy_score(y_test, y_pred))
    except:
        # continue
        print(name, '은 없는 놈')  # 0.23.2 에 없는 algorithm

# 기준이 되는 지표로 삼을 수 있다.
'''
Exemplo n.º 30
0
def test_all_estimators_all_public():
    # all_estimator should not fail when pytest is not installed and return
    # only public estimators
    estimators = all_estimators()
    for est in estimators:
        assert not est.__class__.__name__.startswith("_")