예제 #1
0
    def test_dataframe(self):

        sc = init_nncontext()
        rdd = sc.range(0, 10)
        from pyspark.sql import SparkSession
        spark = SparkSession(sc)
        from pyspark.ml.linalg import DenseVector
        df = rdd.map(lambda x:
                     (DenseVector(np.random.randn(1, ).astype(np.float)),
                      int(np.random.randint(0, 1, size=())))).toDF(
                          ["feature", "label"])

        config = {"batch_size": 4, "lr": 0.8}
        trainer = Estimator(model_creator=model_creator,
                            verbose=True,
                            config=config,
                            workers_per_node=2)

        trainer.fit(df,
                    epochs=1,
                    steps_per_epoch=25,
                    feature_cols=["feature"],
                    label_cols=["label"])
        trainer.evaluate(df,
                         steps=25,
                         feature_cols=["feature"],
                         label_cols=["label"])
        trainer.predict(df, feature_cols=["feature"]).collect()
예제 #2
0
    def test_predict_xshards(self):
        train_data_shard = XShards.partition({
            "x":
            np.random.randn(100, 1),
            "y":
            np.random.randint(0, 1, size=(100, ))
        })
        expected = train_data_shard.collect()

        expected = [shard["x"] for shard in expected]

        for x in expected:
            print(x.shape)

        expected = np.concatenate(expected)

        config = {}
        trainer = Estimator(model_creator=identity_model_creator,
                            verbose=True,
                            config=config,
                            workers_per_node=2)

        result = trainer.predict(train_data_shard, batch_size=10).collect()

        result = [shard["prediction"] for shard in result]

        result = np.concatenate(result)

        assert np.allclose(expected, result)