Пример #1
0
def test_create_stream_feature_view():
    stream_source = KafkaSource(
        name="kafka",
        event_timestamp_column="",
        bootstrap_servers="",
        message_format=AvroFormat(""),
        topic="topic",
        batch_source=FileSource(path="some path"),
    )
    StreamFeatureView(
        name="test batch feature view",
        entities=[],
        ttl=timedelta(days=30),
        source=stream_source,
    )

    with pytest.raises(ValueError):
        StreamFeatureView(name="test batch feature view",
                          entities=[],
                          ttl=timedelta(days=30))

    with pytest.raises(ValueError):
        StreamFeatureView(
            name="test batch feature view",
            entities=[],
            ttl=timedelta(days=30),
            source=FileSource(path="some path"),
        )
Пример #2
0
def test_infer_odfv_list_features(environment, infer_features, tmp_path):
    fake_embedding = [1.0, 1.0]
    items_df = pd.DataFrame(
        data={
            "item_id": [0],
            "embedding_float": [fake_embedding],
            "embedding_double": [fake_embedding],
            "event_timestamp": [pd.Timestamp(datetime.utcnow())],
            "created": [pd.Timestamp(datetime.utcnow())],
        }
    )
    output_path = f"{tmp_path}/items.parquet"
    items_df.to_parquet(output_path)
    fake_items_src = FileSource(
        path=output_path,
        event_timestamp_column="event_timestamp",
        created_timestamp_column="created",
    )
    items = create_item_embeddings_feature_view(fake_items_src)
    sim_odfv = similarity_feature_view(
        {"items": items, "input_request": create_similarity_request_data_source()},
        infer_features=infer_features,
    )
    store = environment.feature_store
    store.apply([item(), items, sim_odfv])
    odfv = store.get_on_demand_feature_view("similarity")
    assert len(odfv.features) == 2
Пример #3
0
def test_feature_view_kw_args_normal():
    file_source = FileSource(name="my-file-source", path="test.parquet")
    feature_view = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    _ = FeatureService(
        name="my-feature-service", features=[feature_view[["feature1", "feature2"]]]
    )
Пример #4
0
    def from_proto(data_source: DataSourceProto) -> Any:
        """
        Converts data source config in protobuf spec to a DataSource class object.

        Args:
            data_source: A protobuf representation of a DataSource.

        Returns:
            A DataSource class object.

        Raises:
            ValueError: The type of DataSource could not be identified.
        """
        if data_source.data_source_class_type:
            cls = get_data_source_class_from_type(data_source.data_source_class_type)
            return cls.from_proto(data_source)

        if data_source.file_options.file_format and data_source.file_options.file_url:
            from feast.infra.offline_stores.file_source import FileSource

            data_source_obj = FileSource.from_proto(data_source)
        elif (
            data_source.bigquery_options.table_ref or data_source.bigquery_options.query
        ):
            from feast.infra.offline_stores.bigquery_source import BigQuerySource

            data_source_obj = BigQuerySource.from_proto(data_source)
        elif data_source.redshift_options.table or data_source.redshift_options.query:
            from feast.infra.offline_stores.redshift_source import RedshiftSource

            data_source_obj = RedshiftSource.from_proto(data_source)
        elif (
            data_source.kafka_options.bootstrap_servers
            and data_source.kafka_options.topic
            and data_source.kafka_options.message_format
        ):
            data_source_obj = KafkaSource.from_proto(data_source)
        elif (
            data_source.kinesis_options.record_format
            and data_source.kinesis_options.region
            and data_source.kinesis_options.stream_name
        ):
            data_source_obj = KinesisSource.from_proto(data_source)
        else:
            raise ValueError("Could not identify the source type being added.")

        return data_source_obj
Пример #5
0
def test_hash():
    file_source = FileSource(name="my-file-source", path="test.parquet")
    feature_view_1 = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    feature_view_2 = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    feature_view_3 = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[Field(name="feature1", dtype=Float32)],
        source=file_source,
    )
    feature_view_4 = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[Field(name="feature1", dtype=Float32)],
        source=file_source,
        description="test",
    )

    s1 = {feature_view_1, feature_view_2}
    assert len(s1) == 1

    s2 = {feature_view_1, feature_view_3}
    assert len(s2) == 2

    s3 = {feature_view_3, feature_view_4}
    assert len(s3) == 2

    s4 = {feature_view_1, feature_view_2, feature_view_3, feature_view_4}
    assert len(s4) == 3
Пример #6
0
    def _localize_feature_view(self, feature_view: FeatureView):
        """
        This function ensures that the `FeatureView` object points to files in the local disk
        """
        if not isinstance(feature_view.batch_source, FileSource):
            return

        # Copy parquet file to a local file
        file_source: FileSource = feature_view.batch_source
        random_local_path = (
            FlyteContext.current_context().file_access.get_random_local_path(
                file_source.path))
        FlyteContext.current_context().file_access.get_data(
            file_source.path,
            random_local_path,
            is_multipart=True,
        )
        feature_view.batch_source = FileSource(
            path=random_local_path,
            event_timestamp_column=file_source.event_timestamp_column,
        )
Пример #7
0
def test_hash():
    file_source = FileSource(name="my-file-source", path="test.parquet")
    feature_view = FeatureView(
        name="my-feature-view",
        entities=[],
        schema=[
            Field(name="feature1", dtype=Float32),
            Field(name="feature2", dtype=Float32),
        ],
        source=file_source,
    )
    sources = [feature_view]
    on_demand_feature_view_1 = OnDemandFeatureView(
        name="my-on-demand-feature-view",
        sources=sources,
        schema=[
            Field(name="output1", dtype=Float32),
            Field(name="output2", dtype=Float32),
        ],
        udf=udf1,
    )
    on_demand_feature_view_2 = OnDemandFeatureView(
        name="my-on-demand-feature-view",
        sources=sources,
        schema=[
            Field(name="output1", dtype=Float32),
            Field(name="output2", dtype=Float32),
        ],
        udf=udf1,
    )
    on_demand_feature_view_3 = OnDemandFeatureView(
        name="my-on-demand-feature-view",
        sources=sources,
        schema=[
            Field(name="output1", dtype=Float32),
            Field(name="output2", dtype=Float32),
        ],
        udf=udf2,
    )
    on_demand_feature_view_4 = OnDemandFeatureView(
        name="my-on-demand-feature-view",
        sources=sources,
        schema=[
            Field(name="output1", dtype=Float32),
            Field(name="output2", dtype=Float32),
        ],
        udf=udf2,
        description="test",
    )

    s1 = {on_demand_feature_view_1, on_demand_feature_view_2}
    assert len(s1) == 1

    s2 = {on_demand_feature_view_1, on_demand_feature_view_3}
    assert len(s2) == 2

    s3 = {on_demand_feature_view_3, on_demand_feature_view_4}
    assert len(s3) == 2

    s4 = {
        on_demand_feature_view_1,
        on_demand_feature_view_2,
        on_demand_feature_view_3,
        on_demand_feature_view_4,
    }
    assert len(s4) == 3