示例#1
0
    def __init__(self, steps, hyperparams=None):
        FeatureUnion.__init__(self, steps, joiner=SelectNonEmptyDataInputs())

        self._make_all_steps_optional()

        if hyperparams is None:
            choices = list(self.keys())[:-1]
            self.set_hyperparams({CHOICE_HYPERPARAM: choices[0]})
            self.set_hyperparams_space({CHOICE_HYPERPARAM: Choice(choices)})
示例#2
0
    def __init__(self, steps, hyperparams=None):
        FeatureUnion.__init__(self, steps, joiner=SelectNonEmptyDataInputs())

        self._make_all_steps_optional()

        choices = list(self.keys())[:-1]

        if hyperparams is None:
            self.update_hyperparams(
                {ChooseOneStepOf.CHOICE_HYPERPARAM: choices[0]})
        else:
            self.update_hyperparams(
                {ChooseOneStepOf.CHOICE_HYPERPARAM: hyperparams})
        self.update_hyperparams_space(
            {ChooseOneStepOf.CHOICE_HYPERPARAM: Choice(choices)})
示例#3
0
def main():
    def accuracy(data_inputs, expected_outputs):
        return np.mean(
            np.argmax(np.array(data_inputs), axis=1) == np.argmax(
                np.array(expected_outputs), axis=1))

    # load the dataset
    df = read_csv('data/winequality-white.csv', sep=';')
    data_inputs = df.values
    data_inputs[:, -1] = data_inputs[:, -1] - 1
    n_features = data_inputs.shape[1] - 1
    n_classes = 10

    p = Pipeline([
        TrainOnlyWrapper(DataShuffler()),
        ColumnTransformerInputOutput(
            input_columns=[(
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ToNumpy(np.float32)
            )],
            output_columns=[(11, Identity())]
        ),
        OutputTransformerWrapper(PlotDistribution(column=-1)),
        MiniBatchSequentialPipeline([
            Tensorflow2ModelStep(
                create_model=create_model,
                create_loss=create_loss,
                create_optimizer=create_optimizer
            ) \
                .set_hyperparams(HyperparameterSamples({
                'n_dense_layers': 2,
                'input_dim': n_features,
                'optimizer': 'adam',
                'activation': 'relu',
                'kernel_initializer': 'he_uniform',
                'learning_rate': 0.01,
                'hidden_dim': 20,
                'n_classes': 3
            })).set_hyperparams_space(HyperparameterSpace({
                'n_dense_layers': RandInt(2, 4),
                'hidden_dim_layer_multiplier': Uniform(0.30, 1),
                'input_dim': FixedHyperparameter(n_features),
                'optimizer': Choice([
                    OPTIMIZERS.ADAM.value,
                    OPTIMIZERS.SGD.value,
                    OPTIMIZERS.ADAGRAD.value
                ]),
                'activation': Choice([
                    ACTIVATIONS.RELU.value,
                    ACTIVATIONS.TANH.value,
                    ACTIVATIONS.SIGMOID.value,
                    ACTIVATIONS.ELU.value,
                ]),
                'kernel_initializer': Choice([
                    KERNEL_INITIALIZERS.GLOROT_NORMAL.value,
                    KERNEL_INITIALIZERS.GLOROT_UNIFORM.value,
                    KERNEL_INITIALIZERS.HE_UNIFORM.value
                ]),
                'learning_rate': LogUniform(0.005, 0.01),
                'hidden_dim': RandInt(3, 80),
                'n_classes': FixedHyperparameter(n_classes)
            }))
        ], batch_size=33),
        OutputTransformerWrapper(Pipeline([
            ExpandDim(),
            OneHotEncoder(nb_columns=n_classes, name='classes')
        ]))
    ])

    auto_ml = AutoML(
        pipeline=p,
        hyperparams_repository=InMemoryHyperparamsRepository(
            cache_folder='trials'),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(test_size=0.30),
        scoring_callback=ScoringCallback(accuracy,
                                         higher_score_is_better=True),
        callbacks=[
            MetricCallback(
                name='classification_report_imbalanced_metric',
                metric_function=classificaiton_report_imbalanced_metric,
                higher_score_is_better=True),
            MetricCallback(name='f1',
                           metric_function=f1_score_weighted,
                           higher_score_is_better=True),
            MetricCallback(name='recall',
                           metric_function=recall_score_weighted,
                           higher_score_is_better=True),
            MetricCallback(name='precision',
                           metric_function=precision_score_weighted,
                           higher_score_is_better=True),
            EarlyStoppingCallback(max_epochs_without_improvement=3)
        ],
        n_trials=200,
        refit_trial=True,
        epochs=75)

    auto_ml = auto_ml.fit(data_inputs=data_inputs)
示例#4
0
from neuraxle.metaopt.tpe import TreeParzenEstimatorHyperparameterSelectionStrategy
from neuraxle.pipeline import Pipeline
from neuraxle.steps.misc import FitTransformCallbackStep
from neuraxle.steps.numpy import AddN
import os


@pytest.mark.parametrize(
    "expected_output_mult, pipeline",
    [(3.5,
      Pipeline([
          FitTransformCallbackStep().set_name('callback'),
          AddN(0.).set_hyperparams_space(
              HyperparameterSpace({
                  'add':
                  Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]),
              })),
          AddN(0.).set_hyperparams_space(
              HyperparameterSpace({
                  'add':
                  Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]),
              }))
      ])),
     (3.5,
      Pipeline([
          FitTransformCallbackStep().set_name('callback'),
          AddN(0.).set_hyperparams_space(
              HyperparameterSpace({
                  'add': Quantized(hd=Uniform(0, 10)),
              })),
          AddN(0.).set_hyperparams_space(
示例#5
0
def main():
    # Define classification models, and hyperparams.
    # See also HyperparameterSpace documentation : https://www.neuraxle.org/stable/api/neuraxle.hyperparams.space.html#neuraxle.hyperparams.space.HyperparameterSpace

    decision_tree_classifier = SKLearnWrapper(
        DecisionTreeClassifier(),
        HyperparameterSpace({
            'criterion': Choice(['gini', 'entropy']),
            'splitter': Choice(['best', 'random']),
            'min_samples_leaf': RandInt(2, 5),
            'min_samples_split': RandInt(2, 4)
        }))

    extra_tree_classifier = SKLearnWrapper(
        ExtraTreeClassifier(),
        HyperparameterSpace({
            'criterion': Choice(['gini', 'entropy']),
            'splitter': Choice(['best', 'random']),
            'min_samples_leaf': RandInt(2, 5),
            'min_samples_split': RandInt(2, 4)
        }))

    ridge_classifier = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            RidgeClassifier(),
            HyperparameterSpace({
                'alpha': Choice([0.0, 1.0, 10.0, 100.0]),
                'fit_intercept': Boolean(),
                'normalize': Boolean()
            }))
    ]).set_name('RidgeClassifier')

    logistic_regression = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            LogisticRegression(),
            HyperparameterSpace({
                'C': LogUniform(0.01, 10.0),
                'fit_intercept': Boolean(),
                'penalty': Choice(['none', 'l2']),
                'max_iter': RandInt(20, 200)
            }))
    ]).set_name('LogisticRegression')

    random_forest_classifier = Pipeline([
        OutputTransformerWrapper(NumpyRavel()),
        SKLearnWrapper(
            RandomForestClassifier(),
            HyperparameterSpace({
                'n_estimators': RandInt(50, 600),
                'criterion': Choice(['gini', 'entropy']),
                'min_samples_leaf': RandInt(2, 5),
                'min_samples_split': RandInt(2, 4),
                'bootstrap': Boolean()
            }))
    ]).set_name('RandomForestClassifier')

    # Define a classification pipeline that lets the AutoML loop choose one of the classifier.
    # See also ChooseOneStepOf documentation : https://www.neuraxle.org/stable/api/neuraxle.steps.flow.html#neuraxle.steps.flow.ChooseOneStepOf

    pipeline = Pipeline([
        ChooseOneStepOf([
            decision_tree_classifier, extra_tree_classifier, ridge_classifier,
            logistic_regression, random_forest_classifier
        ])
    ])

    # Create the AutoML loop object.
    # See also AutoML documentation : https://www.neuraxle.org/stable/api/neuraxle.metaopt.auto_ml.html#neuraxle.metaopt.auto_ml.AutoML

    auto_ml = AutoML(
        pipeline=pipeline,
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(test_size=0.20),
        scoring_callback=ScoringCallback(accuracy_score,
                                         higher_score_is_better=True),
        n_trials=7,
        epochs=1,
        hyperparams_repository=HyperparamsJSONRepository(cache_folder='cache'),
        refit_trial=True,
        continue_loop_on_error=False)

    # Load data, and launch AutoML loop !

    X_train, y_train, X_test, y_test = generate_classification_data()
    auto_ml = auto_ml.fit(X_train, y_train)

    # Get the model from the best trial, and make predictions using predict.
    # See also predict documentation : https://www.neuraxle.org/stable/api/neuraxle.base.html#neuraxle.base.BaseStep.predict

    best_pipeline = auto_ml.get_best_model()
    y_pred = best_pipeline.predict(X_test)

    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
    print("Test accuracy score:", accuracy)

    shutil.rmtree('cache')
示例#6
0
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import InMemoryHyperparamsRepository, AutoML, ValidationSplitter, \
    RandomSearchHyperparameterSelectionStrategy, BaseHyperparameterSelectionStrategy
from neuraxle.metaopt.callbacks import MetricCallback, ScoringCallback
from neuraxle.metaopt.tpe import TreeParzenEstimatorHyperparameterSelectionStrategy
from neuraxle.pipeline import Pipeline
from neuraxle.steps.misc import FitTransformCallbackStep
from neuraxle.steps.numpy import AddN
import os


@pytest.mark.parametrize("expected_output_mult, pipeline", [
    (3.5, Pipeline([
        FitTransformCallbackStep().set_name('callback'),
        AddN(0.).set_hyperparams_space(HyperparameterSpace({
            'add': Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]),
        })),
        AddN(0.).set_hyperparams_space(HyperparameterSpace({
            'add': Choice(choice_list=[0, 1.5, 2, 3.5, 4, 5, 6]),
        }))
    ])),
    (3.5, Pipeline([
        FitTransformCallbackStep().set_name('callback'),
        AddN(0.).set_hyperparams_space(HyperparameterSpace({
            'add': Quantized(hd=Uniform(0, 10)),
        })),
        AddN(0.).set_hyperparams_space(HyperparameterSpace({
            'add': Quantized(hd=Uniform(0, 10)),
        }))
    ])),
    (3.5, Pipeline([