Exemplo n.º 1
0
    def test_transform_multi_class(self):
        # set dim as 2, to mock a multi class model.
        model = create_xor_model(output_dim=2)

        with spark_session('test_transform_multi_class') as spark:
            df = create_xor_data(spark)
            metadata = util._get_metadata(df)

            torch_model = hvd_spark.TorchModel(history=None,
                                               model=model,
                                               input_shapes=[[2]],
                                               feature_columns=['features'],
                                               label_columns=['y'],
                                               _metadata=metadata)
            out_df = torch_model.transform(df)

            # in multi class model, model output is a vector but label is number.
            expected_types = {
                'x1': IntegerType,
                'x2': IntegerType,
                'features': VectorUDT,
                'weight': FloatType,
                'y': FloatType,
                'y__output': VectorUDT
            }

            for field in out_df.schema.fields:
                assert type(field.dataType) == expected_types[field.name]
Exemplo n.º 2
0
    def test_fit_model(self):
        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = F.binary_cross_entropy

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                torch_estimator = hvd_spark.TorchEstimator(
                    num_proc=2,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    random_seed=1,
                    verbose=2,
                    sample_weight_col='weight')

                torch_model = torch_estimator.fit(df)

                trained_model = torch_model.getModel()
                pred = trained_model(torch.ones([1, 2], dtype=torch.int32))
                assert len(pred) == 1
                assert pred.dtype == torch.float32
Exemplo n.º 3
0
    def test_torch_direct_parquet_train(self):
        with spark_session('test_torch_direct_parquet_train') as spark:
            df = create_xor_data(spark)

            backend = CallbackBackend()
            with local_store() as store:
                store.get_train_data_path = lambda v=None: store._train_path
                store.get_val_data_path = lambda v=None: store._val_path

                with util.prepare_data(backend.num_processes(),
                                       store,
                                       df,
                                       feature_columns=['features'],
                                       label_columns=['y']):
                    model = create_xor_model()
                    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
                    loss = nn.BCELoss()

                    est = hvd_spark.TorchEstimator(backend=backend,
                                                   store=store,
                                                   model=model,
                                                   optimizer=optimizer,
                                                   input_shapes=[[2]],
                                                   feature_cols=['features'],
                                                   label_cols=['y'],
                                                   batch_size=1,
                                                   epochs=3,
                                                   verbose=2)

                    # To make sure that setLoss works with non-list loss.
                    est.setLoss(loss)

                    transformer = est.fit_on_parquet()
                    predictions = transformer.transform(df)
                    assert predictions.count() == df.count()
Exemplo n.º 4
0
    def test_fit_model(self):
        model = create_xor_model()
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        loss = 'binary_crossentropy'

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                keras_estimator = hvd.KerasEstimator(
                    num_proc=2,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    verbose=2)

                keras_model = keras_estimator.fit(df)

                trained_model = keras_model.getModel()
                pred = trained_model.predict([np.ones([1, 2], dtype=np.float32)])
                assert len(pred) == 1
                assert pred.dtype == np.float32
Exemplo n.º 5
0
    def test_keras_direct_parquet_train(self, mock_fit_fn, mock_pin_gpu_fn):
        mock_fit_fn.return_value = get_mock_fit_fn()
        mock_pin_gpu_fn.return_value = mock.Mock()

        with spark_session('test_keras_direct_parquet_train') as spark:
            df = create_xor_data(spark)

            backend = CallbackBackend()
            with local_store() as store:
                store.get_train_data_path = lambda v=None: store._train_path
                store.get_val_data_path = lambda v=None: store._val_path

                with util.prepare_data(backend.num_processes(),
                                       store,
                                       df,
                                       feature_columns=['features'],
                                       label_columns=['y']):
                    model = create_xor_model()
                    optimizer = tf.keras.optimizers.SGD(lr=0.1)
                    loss = 'binary_crossentropy'

                    est = hvd.KerasEstimator(backend=backend,
                                             store=store,
                                             model=model,
                                             optimizer=optimizer,
                                             loss=loss,
                                             feature_cols=['features'],
                                             label_cols=['y'],
                                             batch_size=1,
                                             epochs=3,
                                             verbose=2)

                    transformer = est.fit_on_parquet()
                    predictions = transformer.transform(df)
                assert predictions.count() == df.count()
Exemplo n.º 6
0
    def test_transform_multi_class(self):
        model = create_xor_model(output_dim=2)

        with spark_session('test_transform_multi_class') as spark:
            df = create_xor_data(spark)
            metadata = util._get_metadata(df)

            torch_model = hvd_spark.TorchModel(history=None,
                                               model=model,
                                               input_shapes=[[2]],
                                               feature_columns=['features'],
                                               label_columns=['y'],
                                               _metadata=metadata)
            out_df = torch_model.transform(df)

            expected_types = {
                'x1': LongType,
                'x2': LongType,
                'features': VectorUDT,
                'weight': DoubleType,
                'y': DoubleType,
                'y__output': VectorUDT
            }

            for field in out_df.schema.fields:
                assert type(field.dataType) == expected_types[field.name]
Exemplo n.º 7
0
    def test_fit_model(self):
        if sys.version_info < (3, 0, 0) and is_gloo_used():
            self.skipTest(
                'Horovod on Spark over Gloo only supported on Python3')

        model = create_xor_model()
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        loss = 'binary_crossentropy'

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                keras_estimator = hvd.KerasEstimator(num_proc=2,
                                                     store=store,
                                                     model=model,
                                                     optimizer=optimizer,
                                                     loss=loss,
                                                     feature_cols=['features'],
                                                     label_cols=['y'],
                                                     batch_size=1,
                                                     epochs=3,
                                                     verbose=2)

                keras_model = keras_estimator.fit(df)

                trained_model = keras_model.getModel()
                pred = trained_model.predict(
                    [np.ones([1, 2], dtype=np.float32)])
                assert len(pred) == 1
                assert pred.dtype == np.float32
Exemplo n.º 8
0
    def test_fit_model(self):
        if sys.version_info < (3, 0, 0) and is_gloo_used():
            self.skipTest(
                'Horovod on Spark over Gloo only supported on Python3')

        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = F.binary_cross_entropy

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                torch_estimator = hvd.TorchEstimator(
                    num_proc=2,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    verbose=2,
                    sample_weight_col='weight')

                torch_model = torch_estimator.fit(df)

                trained_model = torch_model.getModel()
                pred = trained_model(torch.ones([1, 2], dtype=torch.int32))
                assert len(pred) == 1
                assert pred.dtype == torch.float32
Exemplo n.º 9
0
    def test_prepare_data(self):
        with spark_session('test_prepare_data') as spark:
            df = create_xor_data(spark)

            train_rows = df.count()
            schema_cols = ['features', 'y']
            metadata = util._get_metadata(df)
            assert metadata['features']['intermediate_format'] == constants.ARRAY

            to_petastorm = util.to_petastorm_fn(schema_cols, metadata)
            modified_df = df.rdd.map(to_petastorm).toDF()
            data = modified_df.collect()

            prepare_data = remote._prepare_data_fn(metadata)
            features = torch.tensor([data[i].features for i in range(train_rows)])
            features_prepared = prepare_data('features', features)
            assert np.array_equal(features_prepared, features)
Exemplo n.º 10
0
    def test_restore_from_checkpoint(self, mock_fit_fn, mock_pin_gpu_fn):
        mock_fit_fn.return_value = get_mock_fit_fn()
        mock_pin_gpu_fn.return_value = mock.Mock()

        model = create_xor_model()
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        loss = 'binary_crossentropy'

        with spark_session('test_restore_from_checkpoint') as spark:
            df = create_xor_data(spark)

            backend = CallbackBackend()

            run_id = 'run01'
            with local_store() as store:
                keras_estimator = hvd.KerasEstimator(
                    backend=backend,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    verbose=2,
                    run_id=run_id)

                keras_estimator._load_model_from_checkpoint = mock.Mock(
                    side_effect=keras_estimator._load_model_from_checkpoint)

                ckpt_path = store.get_checkpoint_path(run_id)
                assert not store.exists(ckpt_path)
                keras_estimator._load_model_from_checkpoint.assert_not_called()
                keras_model = keras_estimator.fit(df)

                trained_model = keras_model.getModel()
                pred = trained_model.predict([np.ones([1, 2], dtype=np.float64)])
                assert len(pred) == 1

                assert store.exists(ckpt_path)

                keras_estimator.fit(df)
                keras_estimator._load_model_from_checkpoint.assert_called()
Exemplo n.º 11
0
    def test_model_serialization(self, mock_remote_trainer):
        model = create_xor_model()
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        loss = 'binary_crossentropy'

        def train(serialized_model, train_rows, val_rows, avg_row_size):
            return None, serialized_model, 2

        mock_remote_trainer.return_value = train

        with spark_session('test_model_serialization') as spark:
            df = create_xor_data(spark)

            keras_estimator = hvd.KerasEstimator(model=model,
                                                 optimizer=optimizer,
                                                 loss=loss,
                                                 feature_cols=['features'],
                                                 label_cols=['y'],
                                                 batch_size=1,
                                                 epochs=3,
                                                 verbose=2)

            backend = CallbackBackend()
            with local_store() as store:
                with temppath() as saved_path:
                    keras_estimator.save(saved_path)
                    keras_estimator_loaded = hvd.KerasEstimator.load(
                        saved_path)

                keras_model = keras_estimator_loaded.fit(
                    df,
                    params={
                        keras_estimator_loaded.backend: backend,
                        keras_estimator_loaded.store: store
                    })

                trained_model = keras_model.getModel()
                pred = trained_model.predict(
                    [np.ones([1, 2], dtype=np.float32)])
                assert len(pred) == 1
                assert pred.dtype == np.float32
Exemplo n.º 12
0
    def test_restore_from_checkpoint(self):
        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = nn.BCELoss()

        with spark_session('test_restore_from_checkpoint') as spark:
            df = create_xor_data(spark)

            ctx = CallbackBackend()

            run_id = 'run01'
            with local_store() as store:
                torch_estimator = hvd_spark.TorchEstimator(
                    backend=ctx,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=1,
                    verbose=2,
                    run_id=run_id)

                torch_estimator._load_checkpoint = mock.Mock(
                    side_effect=torch_estimator._load_checkpoint)

                ckpt_path = store.get_checkpoint_path(run_id)
                assert not store.exists(ckpt_path)
                torch_estimator._load_checkpoint.assert_not_called()
                torch_estimator.fit(df)

                assert store.exists(ckpt_path)
                torch_estimator.fit(df)
                torch_estimator._load_checkpoint.assert_called()
Exemplo n.º 13
0
    def test_fit_model(self):
        model = create_xor_model()
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        loss = 'binary_crossentropy'

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                keras_estimator = hvd.KerasEstimator(num_proc=2,
                                                     store=store,
                                                     model=model,
                                                     optimizer=optimizer,
                                                     loss=loss,
                                                     feature_cols=['features'],
                                                     label_cols=['y'],
                                                     batch_size=1,
                                                     random_seed=1,
                                                     epochs=3,
                                                     verbose=2,
                                                     use_gpu=False,
                                                     mp_start_method='spawn')

                assert not keras_estimator.getUseGpu()
                assert 'spawn' == keras_estimator.getMpStartMethod()

                keras_estimator.setMpStartMethod('forkserver')
                assert 'forkserver' == keras_estimator.getMpStartMethod()

                keras_model = keras_estimator.fit(df)

                trained_model = keras_model.getModel()
                pred = trained_model.predict(
                    [np.ones([1, 2], dtype=np.float32)])
                assert len(pred) == 1
                assert pred.dtype == np.float32
Exemplo n.º 14
0
    def test_df_cache(self):
        # Clean the cache before starting the test
        util.clear_training_cache()
        util._training_cache.get_dataset = mock.Mock(
            side_effect=util._training_cache.get_dataset)

        with spark_session('test_df_cache') as spark:
            with local_store() as store:
                df = create_xor_data(spark)
                df2 = create_xor_data(spark)
                df3 = create_xor_data(spark)

                key = util._training_cache.create_key(df, store, None)
                key2 = util._training_cache.create_key(df2, store, None)
                key3 = util._training_cache.create_key(df3, store, None)

                # All keys are distinct
                assert key != key2
                assert key != key3
                assert key2 != key3

                # The cache should be empty to start
                assert not util._training_cache.is_cached(key, store)
                assert not util._training_cache.is_cached(key2, store)
                assert not util._training_cache.is_cached(key3, store)

                # First insertion into the cache
                with util.prepare_data(num_processes=2,
                                       store=store,
                                       df=df,
                                       feature_columns=['features'],
                                       label_columns=['y']) as dataset_idx:
                    train_rows, val_rows, metadata, avg_row_size = util.get_dataset_properties(
                        dataset_idx)
                    util._training_cache.get_dataset.assert_not_called()
                    assert len(util._training_cache._key_to_dataset) == 1
                    assert util._training_cache.is_cached(key, store)
                    assert dataset_idx == 0

                    # The first dataset is still in use, so we assign the next integer in sequence to this
                    # dataset
                    assert not util._training_cache.is_cached(key2, store)
                    with util.prepare_data(num_processes=2,
                                           store=store,
                                           df=df2,
                                           feature_columns=['features'],
                                           label_columns=['y'
                                                          ]) as dataset_idx2:
                        util._training_cache.get_dataset.assert_not_called()
                        assert len(util._training_cache._key_to_dataset) == 2
                        assert util._training_cache.is_cached(key2, store)
                        assert dataset_idx2 == 1

                # Even though the first dataset is no longer in use, it is still cached
                with util.prepare_data(num_processes=2,
                                       store=store,
                                       df=df,
                                       feature_columns=['features'],
                                       label_columns=['y']) as dataset_idx1:
                    train_rows1, val_rows1, metadata1, avg_row_size1 = util.get_dataset_properties(
                        dataset_idx1)
                    util._training_cache.get_dataset.assert_called()
                    assert train_rows == train_rows1
                    assert val_rows == val_rows1
                    assert metadata == metadata1
                    assert avg_row_size == avg_row_size1
                    assert dataset_idx1 == 0

                # The first dataset is no longer in use, so we can reclaim its dataset index
                assert not util._training_cache.is_cached(key3, store)
                with util.prepare_data(num_processes=2,
                                       store=store,
                                       df=df3,
                                       feature_columns=['features'],
                                       label_columns=['y']) as dataset_idx3:
                    train_rows3, val_rows3, metadata3, avg_row_size3 = util.get_dataset_properties(
                        dataset_idx3)
                    assert train_rows == train_rows3
                    assert val_rows == val_rows3
                    assert metadata == metadata3
                    assert avg_row_size == avg_row_size3
                    assert dataset_idx3 == 0

                # Same dataframe, different validation
                bad_key = util._training_cache.create_key(df, store, 0.1)
                assert not util._training_cache.is_cached(bad_key, store)
Exemplo n.º 15
0
    def test_keras_model_checkpoint_callback(self, mock_fit_fn, mock_pin_gpu_fn):
        from horovod.tensorflow.keras.callbacks import BestModelCheckpoint

        def _get_mock_fit_fn(checkpoint_callback_provided):
            def fit(model, train_data, val_data, steps_per_epoch, validation_steps, callbacks,
                    verbose):
                returned_model_checkpoint_present = False
                model_checkpoint_present = False
                for callback in callbacks:
                    callback.set_model(model)
                    if checkpoint_callback_provided:
                        callback.on_epoch_end(0, logs={'binary_crossentropy': 0.3})
                    else:
                        callback.on_epoch_end(0, logs={'binary_crossentropy': 0.3})

                    if checkpoint_callback_provided and isinstance(callback, BestModelCheckpoint):
                        self.assertIsNotNone(callback.filepath)
                        self.assertTrue(callback.save_best_only)
                        self.assertEqual(callback.monitor, 'binary_crossentropy')
                        returned_model_checkpoint_present = True

                    if not checkpoint_callback_provided and isinstance(callback, tf.keras.callbacks.ModelCheckpoint):
                        self.assertFalse(callback.save_best_only)
                        self.assertFalse(callback.save_best_only)
                        self.assertEqual(callback.monitor, 'val_loss')
                        model_checkpoint_present = True

                if checkpoint_callback_provided:
                    self.assertTrue(returned_model_checkpoint_present)
                    self.assertFalse(model_checkpoint_present)
                else:
                    self.assertFalse(returned_model_checkpoint_present)
                    self.assertTrue(model_checkpoint_present)

                return mock.Mock()

            return fit

        mock_pin_gpu_fn.return_value = mock.Mock()

        with spark_session('test_keras_model_chekcpoint_callbacks') as spark:
            df = create_xor_data(spark)

            backend = CallbackBackend()
            with local_store() as store:
                store.get_train_data_path = lambda v=None: store._train_path
                store.get_val_data_path = lambda v=None: store._val_path

                with util.prepare_data(backend.num_processes(),
                                       store,
                                       df,
                                       feature_columns=['features'],
                                       label_columns=['y']):
                    model = create_xor_model()
                    optimizer = tf.keras.optimizers.SGD(lr=0.1)
                    loss = 'binary_crossentropy'

                    # Test when the checkpoint callback is not set, the correct one is created
                    mock_fit_fn.return_value = _get_mock_fit_fn(checkpoint_callback_provided=False)
                    est = hvd.KerasEstimator(
                        backend=backend,
                        store=store,
                        model=model,
                        optimizer=optimizer,
                        loss=loss,
                        feature_cols=['features'],
                        label_cols=['y'],
                        batch_size=1,
                        epochs=3,
                        verbose=2)

                    transformer = est.fit_on_parquet()
                    predictions = transformer.transform(df)
                    assert predictions.count() == df.count()

                    # Test if checkpoint call back is correctly set to the model
                    mock_fit_fn.return_value = _get_mock_fit_fn(checkpoint_callback_provided=True)
                    checkpoint_callback = BestModelCheckpoint(monitor='binary_crossentropy')
                    est = hvd.KerasEstimator(
                        backend=backend,
                        store=store,
                        model=model,
                        optimizer=optimizer,
                        loss=loss,
                        feature_cols=['features'],
                        label_cols=['y'],
                        batch_size=1,
                        epochs=3,
                        verbose=2,
                        checkpoint_callback=checkpoint_callback)

                    transformer = est.fit_on_parquet()
                    predictions = transformer.transform(df)
                    assert predictions.count() == df.count()