Пример #1
0
    def test_feature_transform(self, spark_context, spark_session):
        # arrange
        target_data = [
            {
                "id": 1,
                "feature": 100,
                "id_a": 1,
                "id_b": 2
            },
            {
                "id": 2,
                "feature": 100,
                "id_a": 1,
                "id_b": 2
            },
            {
                "id": 3,
                "feature": 120,
                "id_a": 3,
                "id_b": 4
            },
            {
                "id": 4,
                "feature": 120,
                "id_a": 3,
                "id_b": 4
            },
        ]
        input_df = create_df_from_collection(self.input_data, spark_context,
                                             spark_session)
        target_df = create_df_from_collection(target_data, spark_context,
                                              spark_session)

        feature_using_names = KeyFeature(
            name="id",
            description="id_a and id_b stacked in a single column.",
            dtype=DataType.INTEGER,
            transformation=StackTransform("id_*"),
        )

        # act
        result_df_1 = feature_using_names.transform(input_df)

        # assert
        assert_dataframe_equality(target_df, result_df_1)
Пример #2
0
    def test_with_stack(self, h3_input_df, h3_with_stack_target_df):
        # arrange
        test_feature = KeyFeature(
            name="id",
            description="unit test",
            dtype=DataType.STRING,
            transformation=H3HashTransform(
                h3_resolutions=[6, 7, 8, 9, 10, 11, 12],
                lat_column="lat",
                lng_column="lng",
            ).with_stack(),
        )

        # act
        output_df = test_feature.transform(h3_input_df)

        # assert
        assert_dataframe_equality(h3_with_stack_target_df, output_df)