示例#1
0
    def test_NNEstimator_works_with_VectorAssembler_multi_input(self):
        if self.sc.version.startswith("2"):
            from pyspark.ml.linalg import Vectors
            from pyspark.ml.feature import VectorAssembler
            from pyspark.sql import SparkSession

            spark = SparkSession \
                .builder \
                .getOrCreate()

            df = spark.createDataFrame(
                [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
                 (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
                 (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
                ["user", "age", "income", "history", "label"])

            assembler = VectorAssembler(
                inputCols=["user", "age", "income", "history"],
                outputCol="features")

            df = assembler.transform(df)

            x1 = ZLayer.Input(shape=(1, ))
            x2 = ZLayer.Input(shape=(2, ))
            x3 = ZLayer.Input(shape=(
                2,
                2,
            ))

            user_embedding = ZLayer.Embedding(5, 10)(x1)
            flatten = ZLayer.Flatten()(user_embedding)
            dense1 = ZLayer.Dense(2)(x2)
            gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

            merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
            zy = ZLayer.Dense(2)(merged)

            zmodel = ZModel([x1, x2, x3], zy)
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(zmodel, criterion, [[1], [2], [2, 2]]) \
                .setOptimMethod(Adam()) \
                .setLearningRate(0.1) \
                .setBatchSize(2) \
                .setMaxEpoch(10)

            nnClassifierModel = classifier.fit(df)
            print(nnClassifierModel.getBatchSize())
            res = nnClassifierModel.transform(df).collect()
示例#2
0
    def test_merge_method_sum(self):
        zx1 = ZLayer.Input(shape=(8, ))
        zx2 = ZLayer.Input(shape=(6, ))
        zy1 = ZLayer.Dense(10)(zx1)
        zy2 = ZLayer.Dense(10)(zx2)
        zz = ZLayer.merge([zy1, zy2], mode="sum")
        zmodel = ZModel([zx1, zx2], zz, name="graph1")

        kx1 = KLayer.Input(shape=(8, ))
        kx2 = KLayer.Input(shape=(6, ))
        ky1 = KLayer.Dense(10)(kx1)
        ky2 = KLayer.Dense(10)(kx2)
        kz = kmerge([ky1, ky2], mode="sum")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 8]), np.random.random([2, 6])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)
    def test_nnEstimator_multiInput_cols(self):
        from pyspark.ml.linalg import Vectors
        from pyspark.sql import SparkSession

        spark = SparkSession \
            .builder \
            .getOrCreate()

        df = spark.createDataFrame(
            [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
             (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
             (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0),
             (4, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
            ["user", "age", "income", "history", "label"])

        x1 = ZLayer.Input(shape=(1, ))
        x2 = ZLayer.Input(shape=(2, ))
        x3 = ZLayer.Input(shape=(
            2,
            2,
        ))

        user_embedding = ZLayer.Embedding(5, 10)(x1)
        flatten = ZLayer.Flatten()(user_embedding)
        dense1 = ZLayer.Dense(2)(x2)
        gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

        merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
        zy = ZLayer.Dense(2)(merged)

        zmodel = ZModel([x1, x2, x3], zy)
        criterion = ClassNLLCriterion()
        est = Estimator.from_bigdl(model=zmodel,
                                   loss=criterion,
                                   optimizer=Adam(learningrate=0.1),
                                   feature_preprocessing=[[1], [2], [2, 2]])
        est.fit(df,
                epochs=1,
                batch_size=4,
                feature_cols=["user", "age", "income", "history"])

        res = est.predict(df,
                          feature_cols=["user", "age", "income", "history"])
        res_c = res.collect()
        assert type(res).__name__ == 'DataFrame'
示例#4
0
 def test_load(self):
     input = ZLayer.Input(shape=(5, ))
     output = ZLayer.Dense(10)(input)
     zmodel = ZModel(input, output, name="graph1")
     tmp_path = create_tmp_path()
     zmodel.saveModel(tmp_path, None, True)
     model_reloaded = Net.load(tmp_path)
     input_data = np.random.random([3, 5])
     self.compare_output_and_grad_input(zmodel, model_reloaded, input_data)
示例#5
0
    def test_merge_method_model_concat(self):
        zx1 = ZLayer.Input(shape=(4, ))
        zx2 = ZLayer.Input(shape=(5, ))
        zy1 = ZLayer.Dense(6, activation="sigmoid")(zx1)
        zbranch1 = ZModel(zx1, zy1)(zx1)
        zbranch2 = ZLayer.Dense(8)(zx2)
        zz = ZLayer.merge([zbranch1, zbranch2], mode="concat")
        zmodel = ZModel([zx1, zx2], zz)

        kx1 = KLayer.Input(shape=(4, ))
        kx2 = KLayer.Input(shape=(5, ))
        ky1 = KLayer.Dense(6, activation="sigmoid")(kx1)
        kbranch1 = KModel(kx1, ky1)(kx1)
        kbranch2 = KLayer.Dense(8)(kx2)
        kz = KLayer.merge([kbranch1, kbranch2], mode="concat")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 4]), np.random.random([2, 5])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)
示例#6
0
    def _to_tensor(self):
        x = self.model_inputs[0]
        z = self.model_trainable_values[1]
        assert len(x.zvalue.shape) == 2, "we only accept 2D input"

        if "transA" in self.onnx_attr and self.onnx_attr['transA']:
            # TODO: add transpose operator for this x = x.transpose()
            raise Exception("we don't support this for now")
        layer = zlayers.Dense(len(z))
        return layer(x.zvalue)
示例#7
0
 def test_regularizer(self):
     model = ZSequential()
     model.add(
         ZLayer.Dense(16,
                      W_regularizer=regularizers.l2(0.001),
                      activation='relu',
                      input_shape=(10000, )))
     model.summary()
     model.compile(optimizer='rmsprop',
                   loss='binary_crossentropy',
                   metrics=['acc'])
示例#8
0
 def test_save_load_Sequential(self):
     zmodel = ZSequential()
     dense = ZLayer.Dense(10, input_dim=5)
     zmodel.add(dense)
     tmp_path = create_tmp_path()
     zmodel.saveModel(tmp_path, None, True)
     model_reloaded = Net.load(tmp_path)
     input_data = np.random.random([10, 5])
     y = np.random.random([10, 10])
     model_reloaded.compile(optimizer="adam", loss="mse")
     model_reloaded.fit(x=input_data, y=y, batch_size=8, nb_epoch=1)
示例#9
0
 def test_save_load_Model(self):
     input = ZLayer.Input(shape=(5, ))
     output = ZLayer.Dense(10)(input)
     zmodel = ZModel(input, output, name="graph1")
     tmp_path = create_tmp_path()
     zmodel.saveModel(tmp_path, None, True)
     model_reloaded = Net.load(tmp_path)
     input_data = np.random.random([10, 5])
     y = np.random.random([10, 10])
     model_reloaded.compile(optimizer="adam", loss="mse")
     model_reloaded.fit(x=input_data, y=y, batch_size=8, nb_epoch=2)
示例#10
0
 def create_operator(self):
     assert len(self.inputs) == 1, "Gemm accept single input only"
     input_shape = self.inputs[0].get_input_shape()
     assert len(input_shape) == 2, "we only accept 2D input"
     x = self.inputs[0]
     z = self.params[1]
     if "transA" in self.onnx_attr and self.onnx_attr['transA']:
         # TODO: add transpose operator for this x = x.transpose()
         raise Exception("we don't support this for now")
     layer = zlayers.Dense(len(z))
     return layer
示例#11
0
    def test_NNEstimator_multi_input(self):
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        zmodel = ZModel([zx1, zx2], zy)

        criterion = MSECriterion()
        df = self.get_estimator_df()
        estimator = NNEstimator(zmodel, criterion, [[1], [1]]).setMaxEpoch(5) \
            .setBatchSize(4)
        nnmodel = estimator.fit(df)
        nnmodel.transform(df).collect()
示例#12
0
    def test_merge_method_seq_concat(self):
        zx1 = ZLayer.Input(shape=(10, ))
        zx2 = ZLayer.Input(shape=(10, ))
        zy1 = ZLayer.Dense(12, activation="sigmoid")(zx1)
        zbranch1_node = ZModel(zx1, zy1)(zx1)
        zbranch2 = ZSequential()
        zbranch2.add(ZLayer.Dense(12, input_dim=10))
        zbranch2_node = zbranch2(zx2)
        zz = ZLayer.merge([zbranch1_node, zbranch2_node], mode="concat")
        zmodel = ZModel([zx1, zx2], zz)

        kx1 = KLayer.Input(shape=(10, ))
        kx2 = KLayer.Input(shape=(10, ))
        ky1 = KLayer.Dense(12, activation="sigmoid")(kx1)
        kbranch1_node = KModel(kx1, ky1)(kx1)
        kbranch2 = KSequential()
        kbranch2.add(KLayer.Dense(12, input_dim=10))
        kbranch2_node = kbranch2(kx2)
        kz = KLayer.merge([kbranch1_node, kbranch2_node], mode="concat")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 10]), np.random.random([2, 10])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)
    def test_nnEstimator_multiInput(self):
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        zmodel = ZModel([zx1, zx2], zy)

        criterion = MSECriterion()
        df = self.get_estimator_df()
        estimator = Estimator.from_bigdl(model=zmodel,
                                         loss=criterion,
                                         feature_preprocessing=[[1], [1]])
        estimator.fit(df, epochs=5, batch_size=4)
        pred = estimator.predict(df)
        pred_data = pred.collect()
        assert type(pred).__name__ == 'DataFrame'
示例#14
0
    def test_xshards_spark_estimator_multi_inputs(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../../resources")

        def transform(df):
            result = {
                "x": [
                    np.expand_dims(df['user'].to_numpy(), axis=1),
                    np.expand_dims(df['item'].to_numpy(), axis=1)
                ],
                "y":
                df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        model = ZModel([zx1, zx2], zy)

        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(model=model,
                                             optimizer=optim_method,
                                             loss=ClassNLLCriterion(),
                                             metrics=[Accuracy()],
                                             model_dir=temp_dir_name)
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            eval_result = estimator.evaluate(data=data_shard, batch_size=8)
示例#15
0
 def test_deprecated_save(self):
     with pytest.raises(Exception) as e_info:
         input = ZLayer.Input(shape=(5, ))
         output = ZLayer.Dense(10)(input)
         zmodel = ZModel(input, output, name="graph1")
         zmodel.save(create_tmp_path())