Пример #1
0
    def test_fit_model(self):
        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = F.binary_cross_entropy

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                torch_estimator = hvd_spark.TorchEstimator(
                    num_proc=2,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    random_seed=1,
                    verbose=2,
                    sample_weight_col='weight')

                torch_model = torch_estimator.fit(df)

                trained_model = torch_model.getModel()
                pred = trained_model(torch.ones([1, 2], dtype=torch.int32))
                assert len(pred) == 1
                assert pred.dtype == torch.float32
Пример #2
0
    def test_torch_direct_parquet_train(self):
        with spark_session('test_torch_direct_parquet_train') as spark:
            df = create_xor_data(spark)

            backend = CallbackBackend()
            with local_store() as store:
                store.get_train_data_path = lambda v=None: store._train_path
                store.get_val_data_path = lambda v=None: store._val_path

                with util.prepare_data(backend.num_processes(),
                                       store,
                                       df,
                                       feature_columns=['features'],
                                       label_columns=['y']):
                    model = create_xor_model()
                    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
                    loss = nn.BCELoss()

                    est = hvd_spark.TorchEstimator(backend=backend,
                                                   store=store,
                                                   model=model,
                                                   optimizer=optimizer,
                                                   input_shapes=[[2]],
                                                   feature_cols=['features'],
                                                   label_cols=['y'],
                                                   batch_size=1,
                                                   epochs=3,
                                                   verbose=2)

                    # To make sure that setLoss works with non-list loss.
                    est.setLoss(loss)

                    transformer = est.fit_on_parquet()
                    predictions = transformer.transform(df)
                    assert predictions.count() == df.count()
Пример #3
0
    def test_fit_model(self):
        if sys.version_info < (3, 0, 0) and is_gloo_used():
            self.skipTest(
                'Horovod on Spark over Gloo only supported on Python3')

        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = F.binary_cross_entropy

        with spark_session('test_fit_model') as spark:
            df = create_xor_data(spark)

            with local_store() as store:
                torch_estimator = hvd.TorchEstimator(
                    num_proc=2,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=3,
                    verbose=2,
                    sample_weight_col='weight')

                torch_model = torch_estimator.fit(df)

                trained_model = torch_model.getModel()
                pred = trained_model(torch.ones([1, 2], dtype=torch.int32))
                assert len(pred) == 1
                assert pred.dtype == torch.float32
Пример #4
0
    def test_torch_direct_parquet_train(self):
        with spark_session('test_torch_direct_parquet_train') as spark:
            df = create_xor_data_with_val(spark)

            backend = CallbackBackend()
            with local_store() as store:
                store.get_train_data_path = lambda v=None: store._train_path
                store.get_val_data_path = lambda v=None: store._val_path

                # Make sure we cover validation dataloader as well
                for validation in [None, 'val']:
                    # Need validation ratio to split data
                    with util.prepare_data(backend.num_processes(),
                                           store,
                                           df,
                                           feature_columns=['features'],
                                           label_columns=['y'],
                                           validation=validation):
                        model = create_xor_model()
                        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
                        loss = nn.BCELoss()

                        for inmemory_cache_all in [False, True]:
                            for reader_pool_type in ['process', 'thread']:
                                est = hvd_spark.TorchEstimator(
                                    backend=backend,
                                    store=store,
                                    model=model,
                                    optimizer=optimizer,
                                    input_shapes=[[2]],
                                    feature_cols=['features'],
                                    label_cols=['y'],
                                    batch_size=1,
                                    epochs=3,
                                    verbose=2,
                                    reader_pool_type=reader_pool_type,
                                    inmemory_cache_all=inmemory_cache_all,
                                    validation=validation)

                                # To make sure that setLoss works with non-list loss.
                                est.setLoss(loss)

                                transformer = est.fit_on_parquet()
                                predictions = transformer.transform(df)
                                assert predictions.count() == df.count()
Пример #5
0
    def test_restore_from_checkpoint(self):
        model = create_xor_model()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        loss = nn.BCELoss()

        with spark_session('test_restore_from_checkpoint') as spark:
            df = create_xor_data(spark)

            ctx = CallbackBackend()

            run_id = 'run01'
            with local_store() as store:
                torch_estimator = hvd_spark.TorchEstimator(
                    backend=ctx,
                    store=store,
                    model=model,
                    optimizer=optimizer,
                    loss=loss,
                    input_shapes=[[2]],
                    feature_cols=['features'],
                    label_cols=['y'],
                    batch_size=1,
                    epochs=1,
                    verbose=2,
                    run_id=run_id)

                torch_estimator._load_checkpoint = mock.Mock(
                    side_effect=torch_estimator._load_checkpoint)

                ckpt_path = store.get_checkpoint_path(run_id)
                assert not store.exists(ckpt_path)
                torch_estimator._load_checkpoint.assert_not_called()
                torch_estimator.fit(df)

                assert store.exists(ckpt_path)
                torch_estimator.fit(df)
                torch_estimator._load_checkpoint.assert_called()
Пример #6
0
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)


model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
loss = nn.NLLLoss()

# Train a Horovod Spark Estimator on the DataFrame
torch_estimator = hvd.TorchEstimator(
    num_proc=args.num_proc,
    store=store,
    model=model,
    optimizer=optimizer,
    loss=lambda input, target: loss(input, target.long()),
    input_shapes=[[-1, 1, 28, 28]],
    feature_cols=['features'],
    label_cols=['label'],
    batch_size=args.batch_size,
    epochs=args.epochs,
    verbose=1)

torch_model = torch_estimator.fit(train_df).setOutputCols(['label_prob'])

# Evaluate the model on the held-out test DataFrame
pred_df = torch_model.transform(test_df)
argmax = udf(lambda v: float(np.argmax(v)), returnType=T.DoubleType())
pred_df = pred_df.withColumn('label_pred', argmax(pred_df.label_prob))
evaluator = MulticlassClassificationEvaluator(predictionCol='label_pred',
                                              labelCol='label',
                                              metricName='accuracy')