Пример #1
0
def on_demand_feature_view(
    *args,
    features: Optional[List[Feature]] = None,
    sources: Optional[List[Union[BatchFeatureView, StreamFeatureView,
                                 RequestSource,
                                 FeatureViewProjection, ]]] = None,
    inputs: Optional[Dict[str, Union[FeatureView, RequestSource]]] = None,
    schema: Optional[List[Field]] = None,
    description: str = "",
    tags: Optional[Dict[str, str]] = None,
    owner: str = "",
):
    """
    Creates an OnDemandFeatureView object with the given user function as udf.

    Args:
        features (deprecated): The list of features in the output of the on demand
            feature view, after the transformation has been applied.
        sources (optional): A map from input source names to the actual input sources,
            which may be feature views, or request data sources.
            These sources serve as inputs to the udf, which will refer to them by name.
        inputs (optional): A map from input source names to the actual input sources,
            which may be feature views, feature view projections, or request data sources.
            These sources serve as inputs to the udf, which will refer to them by name.
        schema (optional): The list of features in the output of the on demand feature
            view, after the transformation has been applied.
        description (optional): A human-readable description.
        tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
        owner (optional): The owner of the on demand feature view, typically the email
            of the primary maintainer.
    """
    positional_attributes = ["features", "inputs"]

    _schema = schema or []
    if len(_schema) == 0 and features is not None:
        _schema = [Field.from_feature(feature) for feature in features]
    if features is not None:
        warnings.warn(
            ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
             "Please switch from using `features` to `schema`. This will also requiring switching "
             "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
             "support the `features` parameter."),
            DeprecationWarning,
        )
    _sources = sources or []
    if inputs and sources:
        raise ValueError(
            "At most one of `sources` or `inputs` can be specified.")
    elif inputs:
        warnings.warn(
            ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
             "Feast 0.21 and onwards will not support the `inputs` parameter."
             ),
            DeprecationWarning,
        )
        for _, source in inputs.items():
            if isinstance(source, FeatureView):
                _sources.append(feature_view_to_batch_feature_view(source))
            elif isinstance(source, RequestSource) or isinstance(
                    source, FeatureViewProjection):
                _sources.append(source)
            else:
                raise ValueError(
                    "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                )

    if args:
        warnings.warn(
            ("On demand feature view parameters should be specified as keyword arguments "
             "instead of positional arguments. Feast 0.23 and onwards will not support "
             "positional arguments in on demand feature view definitions."),
            DeprecationWarning,
        )
        if len(args) > len(positional_attributes):
            raise ValueError(
                f"Only {', '.join(positional_attributes)} are allowed as positional args "
                f"when defining feature views, for backwards compatibility.")
        if len(args) >= 1:
            _schema = args[0]
            # Convert Features to Fields.
            if len(_schema) > 0 and isinstance(_schema[0], Feature):
                _schema = [Field.from_feature(feature) for feature in _schema]
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )
        if len(args) >= 2:
            _inputs = args[1]
            for _, source in _inputs.items():
                if isinstance(source, FeatureView):
                    _sources.append(feature_view_to_batch_feature_view(source))
                elif isinstance(source, RequestSource) or isinstance(
                        source, FeatureViewProjection):
                    _sources.append(source)
                else:
                    raise ValueError(
                        "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                    )
                warnings.warn(
                    ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                     "Feast 0.21 and onwards will not support the `inputs` parameter."
                     ),
                    DeprecationWarning,
                )

    if not _sources:
        raise ValueError("The `sources` parameter must be specified.")

    def decorator(user_function):
        on_demand_feature_view_obj = OnDemandFeatureView(
            name=user_function.__name__,
            sources=_sources,
            schema=_schema,
            udf=user_function,
            description=description,
            tags=tags,
            owner=owner,
        )
        functools.update_wrapper(wrapper=on_demand_feature_view_obj,
                                 wrapped=user_function)
        return on_demand_feature_view_obj

    return decorator
Пример #2
0
    def __init__(  # noqa: C901
        self,
        *args,
        name: Optional[str] = None,
        features: Optional[List[Feature]] = None,
        sources: Optional[List[Union[BatchFeatureView, StreamFeatureView,
                                     RequestSource,
                                     FeatureViewProjection, ]]] = None,
        udf: Optional[MethodType] = None,
        inputs: Optional[Dict[str, Union[FeatureView, FeatureViewProjection,
                                         RequestSource]]] = None,
        schema: Optional[List[Field]] = None,
        description: str = "",
        tags: Optional[Dict[str, str]] = None,
        owner: str = "",
    ):
        """
        Creates an OnDemandFeatureView object.

        Args:
            name: The unique name of the on demand feature view.
            features (deprecated): The list of features in the output of the on demand
                feature view, after the transformation has been applied.
            sources (optional): A map from input source names to the actual input sources,
                which may be feature views, or request data sources.
                These sources serve as inputs to the udf, which will refer to them by name.
            udf (optional): The user defined transformation function, which must take pandas
                dataframes as inputs.
            inputs (optional): (Deprecated) A map from input source names to the actual input sources,
                which may be feature views, feature view projections, or request data sources.
                These sources serve as inputs to the udf, which will refer to them by name.
            schema (optional): The list of features in the output of the on demand feature
                view, after the transformation has been applied.
            description (optional): A human-readable description.
            tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
            owner (optional): The owner of the on demand feature view, typically the email
                of the primary maintainer.
        """
        positional_attributes = ["name", "features", "inputs", "udf"]

        _name = name

        _schema = schema or []
        if len(_schema) == 0 and features is not None:
            _schema = [Field.from_feature(feature) for feature in features]
        if features is not None:
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )
        _sources = sources or []
        if inputs and sources:
            raise ValueError(
                "At most one of `sources` or `inputs` can be specified.")
        elif inputs:
            warnings.warn(
                ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                 "Feast 0.21 and onwards will not support the `inputs` parameter."
                 ),
                DeprecationWarning,
            )
            for _, source in inputs.items():
                if isinstance(source, FeatureView):
                    _sources.append(feature_view_to_batch_feature_view(source))
                elif isinstance(source, RequestSource) or isinstance(
                        source, FeatureViewProjection):
                    _sources.append(source)
                else:
                    raise ValueError(
                        "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                    )
        _udf = udf

        if args:
            warnings.warn(
                ("On demand feature view parameters should be specified as keyword arguments "
                 "instead of positional arguments. Feast 0.23 and onwards will not support "
                 "positional arguments in on demand feature view definitions."
                 ),
                DeprecationWarning,
            )
            if len(args) > len(positional_attributes):
                raise ValueError(
                    f"Only {', '.join(positional_attributes)} are allowed as positional args "
                    f"when defining feature views, for backwards compatibility."
                )
            if len(args) >= 1:
                _name = args[0]
            if len(args) >= 2:
                _schema = args[1]
                # Convert Features to Fields.
                if len(_schema) > 0 and isinstance(_schema[0], Feature):
                    _schema = [
                        Field.from_feature(feature) for feature in _schema
                    ]
                warnings.warn(
                    ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                     "Please switch from using `features` to `schema`. This will also requiring switching "
                     "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                     "support the `features` parameter."),
                    DeprecationWarning,
                )
            if len(args) >= 3:
                _inputs = args[2]
                for _, source in _inputs.items():
                    if isinstance(source, FeatureView):
                        _sources.append(
                            feature_view_to_batch_feature_view(source))
                    elif isinstance(source, RequestSource) or isinstance(
                            source, FeatureViewProjection):
                        _sources.append(source)
                    else:
                        raise ValueError(
                            "input can only accept FeatureView, FeatureViewProjection, or RequestSource"
                        )
                warnings.warn(
                    ("The `inputs` parameter is being deprecated. Please use `sources` instead. "
                     "Feast 0.21 and onwards will not support the `inputs` parameter."
                     ),
                    DeprecationWarning,
                )
            if len(args) >= 4:
                _udf = args[3]

        if not _name:
            raise ValueError(
                "The name of the on demand feature view must be specified.")

        if not _sources:
            raise ValueError("The `sources` parameter must be specified.")

        super().__init__(
            name=_name,
            features=_schema,
            description=description,
            tags=tags,
            owner=owner,
        )
        assert _sources is not None
        self.source_feature_view_projections: Dict[str,
                                                   FeatureViewProjection] = {}
        self.source_request_sources: Dict[str, RequestSource] = {}
        for odfv_source in _sources:
            if isinstance(odfv_source, RequestSource):
                self.source_request_sources[odfv_source.name] = odfv_source
            elif isinstance(odfv_source, FeatureViewProjection):
                self.source_feature_view_projections[
                    odfv_source.name] = odfv_source
            else:
                self.source_feature_view_projections[
                    odfv_source.name] = odfv_source.projection

        if _udf is None:
            raise ValueError("The `udf` parameter must be specified.")
        assert _udf
        self.udf = _udf
Пример #3
0
    def __init__(
        self,
        *args,
        name: Optional[str] = None,
        entities: Optional[Union[List[Entity], List[str]]] = None,
        ttl: Optional[Union[Duration, timedelta]] = None,
        batch_source: Optional[DataSource] = None,
        stream_source: Optional[DataSource] = None,
        features: Optional[List[Feature]] = None,
        tags: Optional[Dict[str, str]] = None,
        online: bool = True,
        description: str = "",
        owner: str = "",
        schema: Optional[List[Field]] = None,
        source: Optional[DataSource] = None,
    ):
        """
        Creates a FeatureView object.

        Args:
            name: The unique name of the feature view.
            entities: The list of entities with which this group of features is associated.
            ttl: The amount of time this group of features lives. A ttl of 0 indicates that
                this group of features lives forever. Note that large ttl's or a ttl of 0
                can result in extremely computationally intensive queries.
            batch_source: The batch source of data where this group of features is stored.
            stream_source (optional): The stream source of data where this group of features
                is stored.
            features (deprecated): The list of features defined as part of this feature view.
            tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
            online (optional): A boolean indicating whether online retrieval is enabled for
                this feature view.
            description (optional): A human-readable description.
            owner (optional): The owner of the feature view, typically the email of the
                primary maintainer.
            schema (optional): The schema of the feature view, including feature, timestamp,
                and entity columns.
            source (optional): The source of data for this group of features. May be a stream source, or a batch source.
                If a stream source, the source should contain a batch_source for backfills & batch materialization.

        Raises:
            ValueError: A field mapping conflicts with an Entity or a Feature.
        """

        positional_attributes = ["name", "entities", "ttl"]

        _name = name
        _entities = entities
        _ttl = ttl

        if args:
            warnings.warn(
                ("feature view parameters should be specified as a keyword argument instead of a positional arg."
                 "Feast 0.23+ will not support positional arguments to construct feature views"
                 ),
                DeprecationWarning,
            )
            if len(args) > len(positional_attributes):
                raise ValueError(
                    f"Only {', '.join(positional_attributes)} are allowed as positional args when defining "
                    f"feature views, for backwards compatibility.")
            if len(args) >= 1:
                _name = args[0]
            if len(args) >= 2:
                _entities = args[1]
            if len(args) >= 3:
                _ttl = args[2]

        if not _name:
            raise ValueError("feature view name needs to be specified")

        self.name = _name
        self.entities = (
            [e.name if isinstance(e, Entity) else e
             for e in _entities] if _entities else [DUMMY_ENTITY_NAME])

        self._initialize_sources(_name, batch_source, stream_source, source)

        if isinstance(_ttl, Duration):
            self.ttl = timedelta(seconds=int(_ttl.seconds))
            warnings.warn(
                ("The option to pass a Duration object to the ttl parameter is being deprecated. "
                 "Please pass a timedelta object instead. Feast 0.21 and onwards will not support "
                 "Duration objects."),
                DeprecationWarning,
            )
        elif isinstance(_ttl, timedelta) or _ttl is None:
            self.ttl = _ttl
        else:
            raise ValueError(
                f"unknown value type specified for ttl {type(_ttl)}")

        if features is not None:
            warnings.warn(
                ("The `features` parameter is being deprecated in favor of the `schema` parameter. "
                 "Please switch from using `features` to `schema`. This will also requiring switching "
                 "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not "
                 "support the `features` parameter."),
                DeprecationWarning,
            )

        _schema = schema or []
        if len(_schema) == 0 and features is not None:
            _schema = [Field.from_feature(feature) for feature in features]
        self.schema = _schema

        # TODO(felixwang9817): Infer which fields in the schema are features, timestamps,
        # and entities. For right now we assume that all fields are features, since the
        # current `features` parameter only accepts feature columns.
        _features = _schema

        cols = [entity for entity in self.entities
                ] + [field.name for field in _features]
        for col in cols:
            if (self.batch_source.field_mapping is not None
                    and col in self.batch_source.field_mapping.keys()):
                raise ValueError(
                    f"The field {col} is mapped to {self.batch_source.field_mapping[col]} for this data source. "
                    f"Please either remove this field mapping or use {self.batch_source.field_mapping[col]} as the "
                    f"Entity or Feature name.")

        super().__init__(
            name=_name,
            features=_features,
            description=description,
            tags=tags,
            owner=owner,
        )
        self.online = online
        self.materialization_intervals = []