Python SparkXShards.transform_shard示例

编程语言: Python

命名空间/包名称: bigdl.orca.data

类/类型: SparkXShards

方法/功能: transform_shard

hotexamples.com的示例: 2

Python SparkXShards.transform_shard - 已找到2个示例。这些是从开源项目中提取的最受好评的bigdl.orca.data.SparkXShards.transform_shard现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SparkXShards(18)

repartition(2)

transform_shard(2)

示例#1

显示文件

文件： test_estimator_openvino.py 项目： EmiCareOfCell44/BigDL

    def test_openvino_predict_xshards(self):
        self.load_resnet()
        input_data_list = [np.array([self.input] * 4),
                           np.concatenate([np.array([self.input] * 2),
                                           np.zeros([1, 3, 224, 224])])]
        sc = init_nncontext()
        rdd = sc.parallelize(input_data_list, numSlices=2)
        shards = SparkXShards(rdd)

        def pre_processing(images):
            return {"x": images}

        shards = shards.transform_shard(pre_processing)
        result = self.est.predict(shards)
        result_c = result.collect()
        assert isinstance(result, SparkXShards)
        assert result_c[0]["prediction"].shape == (4, 1000)
        assert result_c[1]["prediction"].shape == (3, 1000)
        assert self.check_result(result_c[0]["prediction"], 4)
        assert self.check_result(result_c[1]["prediction"], 2)
        assert not self.check_result(result_c[1]["prediction"][2:], 1)

示例#2

显示文件

    def test_nnEstimator(self):
        from bigdl.dllib.nnframes import NNModel
        linear_model = Sequential().add(Linear(2, 2))
        mse_criterion = MSECriterion()
        df, _ = self.get_estimator_df()
        est = Estimator.from_bigdl(model=linear_model,
                                   loss=mse_criterion,
                                   optimizer=Adam(),
                                   feature_preprocessing=SeqToTensor([2]),
                                   label_preprocessing=SeqToTensor([2]))
        res0 = est.predict(df)
        res0_c = res0.collect()
        est.fit(df, 2, batch_size=4)
        nn_model = NNModel(est.get_model(),
                           feature_preprocessing=SeqToTensor([2]))
        res1 = nn_model.transform(df)
        res2 = est.predict(df)
        res1_c = res1.collect()
        res2_c = res2.collect()
        assert type(res1).__name__ == 'DataFrame'
        assert type(res2).__name__ == 'DataFrame'
        assert len(res1_c) == len(res2_c)
        for idx in range(len(res1_c)):
            assert res1_c[idx]["prediction"] == res2_c[idx]["prediction"]
        with tempfile.TemporaryDirectory() as tempdirname:
            temp_path = os.path.join(tempdirname, "model")
            est.save(temp_path)
            est2 = Estimator.from_bigdl(model=linear_model, loss=mse_criterion)
            est2.load(temp_path,
                      optimizer=Adam(),
                      loss=mse_criterion,
                      feature_preprocessing=SeqToTensor([2]),
                      label_preprocessing=SeqToTensor([2]))
            est2.set_constant_gradient_clipping(0.1, 1.2)
            est2.clear_gradient_clipping()
            res3 = est2.predict(df)
            res3_c = res3.collect()
            assert type(res3).__name__ == 'DataFrame'
            assert len(res1_c) == len(res3_c)
            for idx in range(len(res1_c)):
                assert res1_c[idx]["prediction"] == res3_c[idx]["prediction"]
            est2.fit(df, 4, batch_size=4)

        data = self.sc.parallelize([((2.0, 1.0), (1.0, 2.0)),
                                    ((1.0, 2.0), (2.0, 1.0)),
                                    ((2.0, 1.0), (1.0, 2.0)),
                                    ((1.0, 2.0), (2.0, 1.0))])
        data_shard = SparkXShards(data)
        data_shard = data_shard.transform_shard(
            lambda feature_label_tuple: {
                "x":
                np.stack([
                    np.expand_dims(np.array(feature_label_tuple[0][0]), axis=0
                                   ),
                    np.expand_dims(np.array(feature_label_tuple[0][1]), axis=0)
                ],
                         axis=1),
                "y":
                np.stack([
                    np.expand_dims(np.array(feature_label_tuple[1][0]), axis=0
                                   ),
                    np.expand_dims(np.array(feature_label_tuple[1][1]), axis=0)
                ],
                         axis=1)
            })
        res4 = est.predict(data_shard)
        res4_c = res4.collect()
        assert type(res4).__name__ == 'SparkXShards'
        for idx in range(len(res4_c)):
            assert abs(res4_c[idx]["prediction"][0][0] -
                       res3_c[idx]["prediction"][0]) == 0
            assert abs(res4_c[idx]["prediction"][0][1] -
                       res3_c[idx]["prediction"][1]) == 0
        est.fit(data_shard, 1, batch_size=4)
        res5 = est.predict(data_shard)
        res5_c = res5.collect()
        res6 = est.predict(df)
        res6_c = res6.collect()
        for idx in range(len(res5_c)):
            assert abs(res5_c[idx]["prediction"][0][0] -
                       res6_c[idx]["prediction"][0]) == 0
            assert abs(res5_c[idx]["prediction"][0][1] -
                       res6_c[idx]["prediction"][1]) == 0