def test_predict_should_predict_in_test_mode(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) outputs = p.predict(np.array([1, 1])) assert np.array_equal(outputs, np.array([2, 2]))
def test_predict_should_transform_with_initial_is_train_mode_after_predict(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) p.predict(np.array([1, 1])) outputs = p.transform(np.array([1, 1])) assert np.array_equal(outputs, np.array([4, 4]))
def test_handle_predict_should_predict_in_test_mode(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) data_container = p.handle_predict(data_container=DataContainer( data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])), context=ExecutionContext()) assert np.array_equal(data_container.data_inputs, np.array([2, 2]))
def main(): def accuracy(data_inputs, expected_outputs): return np.mean( np.argmax(np.array(data_inputs), axis=1) == np.argmax( np.array(expected_outputs), axis=1)) # load the dataset df = read_csv('data/winequality-white.csv', sep=';') data_inputs = df.values data_inputs[:, -1] = data_inputs[:, -1] - 1 n_features = data_inputs.shape[1] - 1 n_classes = 10 p = Pipeline([ TrainOnlyWrapper(DataShuffler()), ColumnTransformerInputOutput( input_columns=[( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ToNumpy(np.float32) )], output_columns=[(11, Identity())] ), OutputTransformerWrapper(PlotDistribution(column=-1)), MiniBatchSequentialPipeline([ Tensorflow2ModelStep( create_model=create_model, create_loss=create_loss, create_optimizer=create_optimizer ) \ .set_hyperparams(HyperparameterSamples({ 'n_dense_layers': 2, 'input_dim': n_features, 'optimizer': 'adam', 'activation': 'relu', 'kernel_initializer': 'he_uniform', 'learning_rate': 0.01, 'hidden_dim': 20, 'n_classes': 3 })).set_hyperparams_space(HyperparameterSpace({ 'n_dense_layers': RandInt(2, 4), 'hidden_dim_layer_multiplier': Uniform(0.30, 1), 'input_dim': FixedHyperparameter(n_features), 'optimizer': Choice([ OPTIMIZERS.ADAM.value, OPTIMIZERS.SGD.value, OPTIMIZERS.ADAGRAD.value ]), 'activation': Choice([ ACTIVATIONS.RELU.value, ACTIVATIONS.TANH.value, ACTIVATIONS.SIGMOID.value, ACTIVATIONS.ELU.value, ]), 'kernel_initializer': Choice([ KERNEL_INITIALIZERS.GLOROT_NORMAL.value, KERNEL_INITIALIZERS.GLOROT_UNIFORM.value, KERNEL_INITIALIZERS.HE_UNIFORM.value ]), 'learning_rate': LogUniform(0.005, 0.01), 'hidden_dim': RandInt(3, 80), 'n_classes': FixedHyperparameter(n_classes) })) ], batch_size=33), OutputTransformerWrapper(Pipeline([ ExpandDim(), OneHotEncoder(nb_columns=n_classes, name='classes') ])) ]) auto_ml = AutoML( pipeline=p, hyperparams_repository=InMemoryHyperparamsRepository( cache_folder='trials'), hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_splitter=ValidationSplitter(test_size=0.30), scoring_callback=ScoringCallback(accuracy, higher_score_is_better=True), callbacks=[ MetricCallback( name='classification_report_imbalanced_metric', metric_function=classificaiton_report_imbalanced_metric, higher_score_is_better=True), MetricCallback(name='f1', metric_function=f1_score_weighted, higher_score_is_better=True), MetricCallback(name='recall', metric_function=recall_score_weighted, higher_score_is_better=True), MetricCallback(name='precision', metric_function=precision_score_weighted, higher_score_is_better=True), EarlyStoppingCallback(max_epochs_without_improvement=3) ], n_trials=200, refit_trial=True, epochs=75) auto_ml = auto_ml.fit(data_inputs=data_inputs)
def __init__(self, wrapped, seed=None): Pipeline.__init__(self, [TrainOnlyWrapper(DataShuffler(seed=seed)), wrapped])
def test_step_repr_representation_works_correctly(): output = repr(TrainOnlyWrapper(SomeTruncableStep())) assert output == """TrainOnlyWrapper(SomeTruncableStep([
def main(chosen_device): exercice_number = 1 print('exercice {}\n=================='.format(exercice_number)) data_inputs, expected_outputs = generate_data( # See: https://github.com/guillaume-chevalier/seq2seq-signal-prediction/blob/master/datasets.py exercice_number=exercice_number, n_samples=None, window_size_past=None, window_size_future=None) print('data_inputs shape: {} => (n_samples, window_size_past, input_dim)'. format(data_inputs.shape)) print( 'expected_outputs shape: {} => (n_samples, window_size_future, output_dim)' .format(expected_outputs.shape)) sequence_length = data_inputs.shape[1] input_dim = data_inputs.shape[2] output_dim = expected_outputs.shape[2] batch_size = 100 epochs = 3 validation_size = 0.15 max_plotted_validation_predictions = 10 seq2seq_pipeline_hyperparams = HyperparameterSamples({ 'hidden_dim': 100, 'layers_stacked_count': 2, 'lambda_loss_amount': 0.0003, 'learning_rate': 0.006, 'window_size_future': sequence_length, 'output_dim': output_dim, 'input_dim': input_dim }) feature_0_metric = metric_3d_to_2d_wrapper(mean_squared_error) metrics = {'mse': feature_0_metric} signal_prediction_pipeline = Pipeline([ ForEachDataInput(MeanStdNormalizer()), ToNumpy(), PlotPredictionsWrapper( Tensorflow2ModelStep( # See: https://github.com/Neuraxio/Neuraxle-TensorFlow create_model=create_model, create_loss=create_loss, create_optimizer=create_optimizer, expected_outputs_dtype=tf.dtypes.float32, data_inputs_dtype=tf.dtypes.float32, print_loss=True).set_hyperparams(seq2seq_pipeline_hyperparams)) ]).set_name('SignalPrediction') pipeline = Pipeline([ EpochRepeater(ValidationSplitWrapper( MetricsWrapper(Pipeline([ TrainOnlyWrapper(DataShuffler()), MiniBatchSequentialPipeline([ MetricsWrapper(signal_prediction_pipeline, metrics=metrics, name='batch_metrics') ], batch_size=batch_size) ]), metrics=metrics, name='epoch_metrics', print_metrics=True), test_size=validation_size, scoring_function=feature_0_metric), epochs=epochs) ]) pipeline, outputs = pipeline.fit_transform(data_inputs, expected_outputs) plot_metrics(pipeline=pipeline, exercice_number=exercice_number) plot_predictions(data_inputs, expected_outputs, pipeline, max_plotted_validation_predictions)