Пример #1
0
    def test_filter(self, feature_set_dataframe, spark_context, spark_session):
        # given
        file_reader = FileReader("test", "path/to/file", "format")

        file_reader.with_(
            transformer=filter,
            condition="test not in ('fail') and feature in (110, 120)",
        )

        # when
        result_df = file_reader._apply_transformations(feature_set_dataframe)

        target_data = [
            {
                "id": 1,
                TIMESTAMP_COLUMN: 1,
                "feature": 110,
                "test": "pass"
            },
            {
                "id": 1,
                TIMESTAMP_COLUMN: 2,
                "feature": 120,
                "test": "pass"
            },
        ]
        target_df = spark_session.read.json(
            spark_context.parallelize(target_data, 1))

        # then
        assert result_df.collect() == target_df.collect()
Пример #2
0
    def test_filter_with_invalidations(self, feature_set_dataframe, condition,
                                       spark_context, spark_session):
        # given
        file_reader = FileReader("test", "path/to/file", "format")

        file_reader.with_(transformer=filter, condition=condition)

        # then
        with pytest.raises(TypeError):
            file_reader._apply_transformations(feature_set_dataframe)
Пример #3
0
    def test_with_(self, transformations, spark_client):
        # arrange
        file_reader = FileReader("test", "path/to/file", "format")

        # act
        for transformation in transformations:
            file_reader.with_(
                transformation["transformer"],
                *transformation["args"],
                **transformation["kwargs"],
            )

        # assert
        assert file_reader.transformations == transformations
Пример #4
0
    def test_apply_pivot_transformation(self, input_df, pivot_df):
        # arrange
        file_reader = FileReader("test", "path/to/file", "format")
        file_reader.with_(
            transformer=pivot,
            group_by_columns=["id", "ts"],
            pivot_column="pivot_column",
            agg_column="has_feature",
            aggregation=first,
        )

        # act
        result_df = file_reader._apply_transformations(input_df)

        # assert
        assert compare_dataframes(
            actual_df=result_df,
            expected_df=pivot_df,
        )