def test_output_columns(self): # arrange h3_feature = Feature( name="new_feature", description="unit test", dtype=DataType.STRING, transformation=H3HashTransform( h3_resolutions=[6, 7, 8, 9, 10, 11, 12], lat_column="lat", lng_column="lng", ), ) target_columns = [ "lat_lng__h3_hash__6", "lat_lng__h3_hash__7", "lat_lng__h3_hash__8", "lat_lng__h3_hash__9", "lat_lng__h3_hash__10", "lat_lng__h3_hash__11", "lat_lng__h3_hash__12", ] # act output_columns = h3_feature.get_output_columns() # assert assert sorted(output_columns) == sorted(target_columns)
def test_feature_get_output_columns_without_transformations(self): test_feature = Feature( name="feature", from_column="origin", description="unit test", dtype=DataType.BIGINT, ) assert test_feature.get_output_columns() == [test_feature.name]
def test__get_features_columns(self): # arrange feature_1 = Feature("feature1", "description", DataType.FLOAT) feature_1.get_output_columns = Mock(return_value=["col_a", "col_b"]) feature_2 = Feature("feature2", "description", DataType.FLOAT) feature_2.get_output_columns = Mock(return_value=["col_c"]) feature_3 = Feature("feature3", "description", DataType.FLOAT) feature_3.get_output_columns = Mock(return_value=["col_d"]) target_features_columns = ["col_a", "col_b", "col_c", "col_d"] # act result_features_columns = FeatureSet._get_features_columns( feature_1, feature_2, feature_3) # assert assert target_features_columns == result_features_columns
def test_output_columns(self, feature_set_dataframe): test_feature = Feature( name="feature", description="unit test", dtype=DataType.BIGINT, transformation=CustomTransform( transformer=divide, column1="feature1", column2="feature2", ), ) df_columns = test_feature.get_output_columns() assert isinstance(df_columns, list) assert df_columns == ["feature"]
def test_feature_get_output_columns_with_transformations( self, feature_set_dataframe): some_transformation = Mock() some_transformation.output_columns = feature_set_dataframe.columns test_feature = Feature( name="feature", from_column="origin", description="unit test", transformation=some_transformation, dtype=DataType.BIGINT, ) assert test_feature.get_output_columns( ) == feature_set_dataframe.columns
def test_output_columns(self): test_feature = Feature( name="feature1_over_feature2", description="unit test", dtype=DataType.FLOAT, transformation=SQLExpressionTransform( expression="feature1/feature2"), ) df_columns = test_feature.get_output_columns() assert all( [a == b for a, b in zip( df_columns, ["feature1_over_feature2"], )])
def test_output_columns(self): test_feature = Feature( name="feature1", description="unit test", transformation=AggregatedTransform(functions=[ Function(functions.avg, DataType.DOUBLE), Function(functions.stddev_pop, DataType.DOUBLE), ]), ) df_columns = test_feature.get_output_columns() assert all([ a == b for a, b in zip( df_columns, ["feature1__avg", "feature1__stddev_pop"], ) ])
def test_output_columns(self): test_feature = Feature( name="feature1", description="unit test", transformation=SparkFunctionTransform(functions=[ Function(functions.avg, DataType.DOUBLE) ], ).with_window( partition_by="id", mode="fixed_windows", window_definition=["7 days", "2 weeks"], ), ) df_columns = test_feature.get_output_columns() assert all([ a == b for a, b in zip( df_columns, [ "feature1__avg_over_7_days_fixed_windows", "feature1__avg_over_2_weeks_fixed_windows", ], ) ])