Пример #1
0
def _write_minibatch(
    client,
    project: str,
    table: Union[FeatureTable, FeatureView],
    data: Sequence[
        Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
    ],
    progress: Optional[Callable[[int], Any]],
):
    entities = []
    for entity_key, features, timestamp, created_ts in data:
        document_id = compute_datastore_entity_id(entity_key)

        key = client.key("Project", project, "Table", table.name, "Row", document_id,)

        entity = datastore.Entity(
            key=key, exclude_from_indexes=("created_ts", "event_ts", "values")
        )

        entity.update(
            dict(
                key=entity_key.SerializeToString(),
                values={k: v.SerializeToString() for k, v in features.items()},
                event_ts=utils.make_tzaware(timestamp),
                created_ts=(
                    utils.make_tzaware(created_ts) if created_ts is not None else None
                ),
            )
        )
        entities.append(entity)
    with client.transaction():
        client.put_multi(entities)

    if progress:
        progress(len(entities))
Пример #2
0
    def from_proto(cls, feature_view_proto: FeatureViewProto):
        """
        Creates a feature view from a protobuf representation of a feature view.

        Args:
            feature_view_proto: A protobuf representation of a feature view.

        Returns:
            A FeatureViewProto object based on the feature view protobuf.
        """
        batch_source = DataSource.from_proto(feature_view_proto.spec.batch_source)
        stream_source = (
            DataSource.from_proto(feature_view_proto.spec.stream_source)
            if feature_view_proto.spec.HasField("stream_source")
            else None
        )
        feature_view = cls(
            name=feature_view_proto.spec.name,
            entities=[entity for entity in feature_view_proto.spec.entities],
            features=[
                Feature(
                    name=feature.name,
                    dtype=ValueType(feature.value_type),
                    labels=dict(feature.labels),
                )
                for feature in feature_view_proto.spec.features
            ],
            tags=dict(feature_view_proto.spec.tags),
            online=feature_view_proto.spec.online,
            ttl=(
                None
                if feature_view_proto.spec.ttl.seconds == 0
                and feature_view_proto.spec.ttl.nanos == 0
                else feature_view_proto.spec.ttl
            ),
            batch_source=batch_source,
            stream_source=stream_source,
        )

        if feature_view_proto.meta.HasField("created_timestamp"):
            feature_view.created_timestamp = (
                feature_view_proto.meta.created_timestamp.ToDatetime()
            )
        if feature_view_proto.meta.HasField("last_updated_timestamp"):
            feature_view.last_updated_timestamp = (
                feature_view_proto.meta.last_updated_timestamp.ToDatetime()
            )

        for interval in feature_view_proto.meta.materialization_intervals:
            feature_view.materialization_intervals.append(
                (
                    utils.make_tzaware(interval.start_time.ToDatetime()),
                    utils.make_tzaware(interval.end_time.ToDatetime()),
                )
            )

        return feature_view
Пример #3
0
    def materialize(
        self,
        start_date: datetime,
        end_date: datetime,
        feature_views: Optional[List[str]] = None,
    ) -> None:
        """
        Materialize data from the offline store into the online store.

        This method loads feature data in the specified interval from either
        the specified feature views, or all feature views if none are specified,
        into the online store where it is available for online serving.

        Args:
            start_date (datetime): Start date for time range of data to materialize into the online store
            end_date (datetime): End date for time range of data to materialize into the online store
            feature_views (List[str]): Optional list of feature view names. If selected, will only run
                materialization for the specified feature views.

        Examples:
            Materialize all features into the online store over the interval
            from 3 hours ago to 10 minutes ago.
            >>> from datetime import datetime, timedelta
            >>> from feast.feature_store import FeatureStore
            >>>
            >>> fs = FeatureStore(config=RepoConfig(provider="gcp"))
            >>> fs.materialize(
            >>>   start_date=datetime.utcnow() - timedelta(hours=3), end_date=datetime.utcnow() - timedelta(minutes=10)
            >>> )
        """
        self._tele.log("materialize")

        if utils.make_tzaware(start_date) > utils.make_tzaware(end_date):
            raise ValueError(
                f"The given start_date {start_date} is greater than the given end_date {end_date}."
            )

        feature_views_to_materialize = []
        if feature_views is None:
            feature_views_to_materialize = self._registry.list_feature_views(
                self.config.project
            )
        else:
            for name in feature_views:
                feature_view = self._registry.get_feature_view(
                    name, self.config.project
                )
                feature_views_to_materialize.append(feature_view)

        # TODO paging large loads
        for feature_view in feature_views_to_materialize:
            provider = self._get_provider()
            _print_materialization_log(start_date, end_date, feature_view)
            provider.materialize_single_feature_view(
                feature_view, start_date, end_date, self._registry, self.project
            )
            print(" done!")
Пример #4
0
    def from_proto(cls, feature_view_proto: FeatureViewProto):
        """
        Creates a feature view from a protobuf representation of a feature view.

        Args:
            feature_view_proto: A protobuf representation of a feature view.

        Returns:
            A FeatureViewProto object based on the feature view protobuf.
        """
        batch_source = DataSource.from_proto(
            feature_view_proto.spec.batch_source)
        stream_source = (
            DataSource.from_proto(feature_view_proto.spec.stream_source)
            if feature_view_proto.spec.HasField("stream_source") else None)
        feature_view = cls(
            name=feature_view_proto.spec.name,
            entities=[entity for entity in feature_view_proto.spec.entities],
            schema=[
                Field.from_proto(field_proto)
                for field_proto in feature_view_proto.spec.features
            ],
            description=feature_view_proto.spec.description,
            tags=dict(feature_view_proto.spec.tags),
            owner=feature_view_proto.spec.owner,
            online=feature_view_proto.spec.online,
            ttl=(timedelta(
                days=0) if feature_view_proto.spec.ttl.ToNanoseconds() == 0
                 else feature_view_proto.spec.ttl.ToTimedelta()),
            source=batch_source,
        )
        if stream_source:
            feature_view.stream_source = stream_source

        # FeatureViewProjections are not saved in the FeatureView proto.
        # Create the default projection.
        feature_view.projection = FeatureViewProjection.from_definition(
            feature_view)

        if feature_view_proto.meta.HasField("created_timestamp"):
            feature_view.created_timestamp = (
                feature_view_proto.meta.created_timestamp.ToDatetime())
        if feature_view_proto.meta.HasField("last_updated_timestamp"):
            feature_view.last_updated_timestamp = (
                feature_view_proto.meta.last_updated_timestamp.ToDatetime())

        for interval in feature_view_proto.meta.materialization_intervals:
            feature_view.materialization_intervals.append((
                utils.make_tzaware(interval.start_time.ToDatetime()),
                utils.make_tzaware(interval.end_time.ToDatetime()),
            ))

        return feature_view
Пример #5
0
    def online_write_batch(
        self,
        project: str,
        table: Union[FeatureTable, FeatureView],
        data: List[
            Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
        ],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        client = self._get_client()

        entity_hset = {}
        feature_view = table.name

        ex = Timestamp()
        ex.seconds = EX_SECONDS
        ex_str = ex.SerializeToString()

        for entity_key, values, timestamp, created_ts in data:
            redis_key_bin = _redis_key(project, entity_key)
            ts = Timestamp()
            ts.seconds = int(utils.make_tzaware(timestamp).timestamp())
            entity_hset[f"_ts:{feature_view}"] = ts.SerializeToString()
            entity_hset[f"_ex:{feature_view}"] = ex_str

            for feature_name, val in values.items():
                f_key = _mmh3(f"{feature_view}:{feature_name}")
                entity_hset[f_key] = val.SerializeToString()

            client.hset(redis_key_bin, mapping=entity_hset)
            if progress:
                progress(1)
Пример #6
0
    def online_write_batch(
        self,
        config: RepoConfig,
        table: FeatureView,
        data: List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime,
                         Optional[datetime]]],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        online_config = config.online_store
        assert isinstance(online_config, DynamoDBOnlineStoreConfig)
        dynamodb_resource = self._get_dynamodb_resource(online_config.region)

        table_instance = dynamodb_resource.Table(_get_table_name(
            config, table))
        with table_instance.batch_writer() as batch:
            for entity_key, features, timestamp, created_ts in data:
                entity_id = compute_entity_id(entity_key)
                batch.put_item(
                    Item={
                        "entity_id": entity_id,  # PartitionKey
                        "event_ts": str(utils.make_tzaware(timestamp)),
                        "values": {
                            k: v.SerializeToString()
                            for k, v in features.items()  # Serialized Features
                        },
                    })
                if progress:
                    progress(1)
Пример #7
0
    def materialize_single_feature_view(
        self,
        feature_view: FeatureView,
        start_date: datetime,
        end_date: datetime,
        registry: Registry,
        project: str,
        tqdm_builder: Callable[[int], tqdm],
    ) -> None:
        entities = []
        for entity_name in feature_view.entities:
            entities.append(registry.get_entity(entity_name, project))

        (
            join_key_columns,
            feature_name_columns,
            event_timestamp_column,
            created_timestamp_column,
        ) = _get_column_names(feature_view, entities)

        start_date = utils.make_tzaware(start_date)
        end_date = utils.make_tzaware(end_date)

        table = self.offline_store.pull_latest_from_table_or_query(
            data_source=feature_view.input,
            join_key_columns=join_key_columns,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_timestamp_column,
            created_timestamp_column=created_timestamp_column,
            start_date=start_date,
            end_date=end_date,
        )

        if feature_view.input.field_mapping is not None:
            table = _run_field_mapping(table, feature_view.input.field_mapping)

        join_keys = [entity.join_key for entity in entities]
        rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)

        with tqdm_builder(len(rows_to_write)) as pbar:
            self.online_write_batch(
                project, feature_view, rows_to_write, lambda x: pbar.update(x)
            )

        feature_view.materialization_intervals.append((start_date, end_date))
        registry.apply_feature_view(feature_view, project)
Пример #8
0
    def materialize_single_feature_view(
        self,
        feature_view: FeatureView,
        start_date: datetime,
        end_date: datetime,
        registry: Registry,
        project: str,
    ) -> None:
        assert isinstance(feature_view.input, BigQuerySource)

        entities = []
        for entity_name in feature_view.entities:
            entities.append(registry.get_entity(entity_name, project))

        (
            join_key_columns,
            feature_name_columns,
            event_timestamp_column,
            created_timestamp_column,
        ) = _get_column_names(feature_view, entities)

        start_date = utils.make_tzaware(start_date)
        end_date = utils.make_tzaware(end_date)

        offline_store = get_offline_store_from_sources([feature_view.input])
        table = offline_store.pull_latest_from_table_or_query(
            data_source=feature_view.input,
            join_key_columns=join_key_columns,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_timestamp_column,
            created_timestamp_column=created_timestamp_column,
            start_date=start_date,
            end_date=end_date,
        )

        if feature_view.input.field_mapping is not None:
            table = _run_field_mapping(table, feature_view.input.field_mapping)

        join_keys = [entity.join_key for entity in entities]
        rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)

        self.online_write_batch(project, feature_view, rows_to_write, None)

        feature_view.materialization_intervals.append((start_date, end_date))
        registry.apply_feature_view(feature_view, project)
Пример #9
0
def materialize_command(ctx: click.Context, start_ts: str, end_ts: str,
                        views: List[str]):
    """
    Run a (non-incremental) materialization job to ingest data into the online store. Feast
    will read all data between START_TS and END_TS from the offline store and write it to the
    online store. If you don't specify feature view names using --views, all registered Feature
    Views will be materialized.

    START_TS and END_TS should be in ISO 8601 format, e.g. '2021-07-16T19:20:01'
    """
    repo = ctx.obj["CHDIR"]
    cli_check_repo(repo)
    store = FeatureStore(repo_path=str(repo))
    store.materialize(
        feature_views=None if not views else views,
        start_date=utils.make_tzaware(datetime.fromisoformat(start_ts)),
        end_date=utils.make_tzaware(datetime.fromisoformat(end_ts)),
    )
Пример #10
0
    def from_proto(cls, feature_view_proto: FeatureViewProto):
        """
        Creates a feature view from a protobuf representation of a feature view

        Args:
            feature_view_proto: A protobuf representation of a feature view

        Returns:
            Returns a FeatureViewProto object based on the feature view protobuf
        """

        feature_view = cls(
            name=feature_view_proto.spec.name,
            entities=[entity for entity in feature_view_proto.spec.entities],
            features=[
                Feature(
                    name=feature.name,
                    dtype=ValueType(feature.value_type),
                    labels=feature.labels,
                ) for feature in feature_view_proto.spec.features
            ],
            tags=dict(feature_view_proto.spec.tags),
            online=feature_view_proto.spec.online,
            ttl=(None if feature_view_proto.spec.ttl.seconds == 0
                 and feature_view_proto.spec.ttl.nanos == 0 else
                 feature_view_proto.spec.ttl),
            input=DataSource.from_proto(feature_view_proto.spec.input),
        )

        feature_view.created_timestamp = feature_view_proto.meta.created_timestamp

        for interval in feature_view_proto.meta.materialization_intervals:
            feature_view.materialization_intervals.append((
                utils.make_tzaware(interval.start_time.ToDatetime()),
                utils.make_tzaware(interval.end_time.ToDatetime()),
            ))

        return feature_view
Пример #11
0
def _insert_data_test_table(data, project, tbl_name, region):
    dynamodb_resource = boto3.resource("dynamodb", region_name=region)
    table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}")
    for entity_key, features, timestamp, created_ts in data:
        entity_id = compute_entity_id(entity_key)
        with table_instance.batch_writer() as batch:
            batch.put_item(
                Item={
                    "entity_id": entity_id,
                    "event_ts": str(utils.make_tzaware(timestamp)),
                    "values":
                    {k: v.SerializeToString()
                     for k, v in features.items()},
                })
Пример #12
0
    def retrieve_saved_dataset(self, config: RepoConfig,
                               dataset: SavedDataset) -> RetrievalJob:
        set_usage_attribute("provider", self.__class__.__name__)

        feature_name_columns = [
            ref.replace(":", "__")
            if dataset.full_feature_names else ref.split(":")[1]
            for ref in dataset.features
        ]

        # ToDo: replace hardcoded value
        event_ts_column = "event_timestamp"

        return self.offline_store.pull_all_from_table_or_query(
            config=config,
            data_source=dataset.storage.to_data_source(),
            join_key_columns=dataset.join_keys,
            feature_name_columns=feature_name_columns,
            event_timestamp_column=event_ts_column,
            start_date=make_tzaware(
                dataset.min_event_timestamp),  # type: ignore
            end_date=make_tzaware(dataset.max_event_timestamp +
                                  timedelta(seconds=1)),  # type: ignore
        )
Пример #13
0
    def _materialize_single_feature_view(self, feature_view: FeatureView,
                                         start_date: datetime,
                                         end_date: datetime) -> None:
        (
            entity_names,
            feature_names,
            event_timestamp_column,
            created_timestamp_column,
        ) = _run_reverse_field_mapping(feature_view)

        start_date = utils.make_tzaware(start_date)
        end_date = utils.make_tzaware(end_date)

        provider = self._get_provider()
        table = provider.pull_latest_from_table_or_query(
            feature_view.input,
            entity_names,
            feature_names,
            event_timestamp_column,
            created_timestamp_column,
            start_date,
            end_date,
        )

        if feature_view.input.field_mapping is not None:
            table = _run_forward_field_mapping(
                table, feature_view.input.field_mapping)

        rows_to_write = _convert_arrow_to_proto(table, feature_view)

        provider = self._get_provider()
        provider.online_write_batch(self.config.project, feature_view,
                                    rows_to_write, None)

        feature_view.materialization_intervals.append((start_date, end_date))
        self.apply([feature_view])
Пример #14
0
 def _write_batch_non_duplicates(
     self,
     table_instance,
     data: List[
         Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
     ],
     progress: Optional[Callable[[int], Any]],
 ):
     """Deduplicate write batch request items on ``entity_id`` primary key."""
     with table_instance.batch_writer(overwrite_by_pkeys=["entity_id"]) as batch:
         for entity_key, features, timestamp, created_ts in data:
             entity_id = compute_entity_id(entity_key)
             batch.put_item(
                 Item={
                     "entity_id": entity_id,  # PartitionKey
                     "event_ts": str(utils.make_tzaware(timestamp)),
                     "values": {
                         k: v.SerializeToString()
                         for k, v in features.items()  # Serialized Features
                     },
                 }
             )
             if progress:
                 progress(1)
Пример #15
0
def _write_minibatch(
    client,
    project: str,
    table: Union[FeatureTable, FeatureView],
    data: Sequence[
        Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
    ],
    progress: Optional[Callable[[int], Any]],
):
    from google.api_core.exceptions import Conflict
    from google.cloud import datastore

    num_retries_on_conflict = 3
    row_count = 0
    for retry_number in range(num_retries_on_conflict):
        try:
            row_count = 0
            with client.transaction():
                for entity_key, features, timestamp, created_ts in data:
                    document_id = compute_datastore_entity_id(entity_key)

                    key = client.key(
                        "Project", project, "Table", table.name, "Row", document_id,
                    )

                    entity = client.get(key)
                    if entity is not None:
                        if entity["event_ts"] > utils.make_tzaware(timestamp):
                            # Do not overwrite feature values computed from fresher data
                            continue
                        elif (
                            entity["event_ts"] == utils.make_tzaware(timestamp)
                            and created_ts is not None
                            and entity["created_ts"] is not None
                            and entity["created_ts"] > utils.make_tzaware(created_ts)
                        ):
                            # Do not overwrite feature values computed from the same data, but
                            # computed later than this one
                            continue
                    else:
                        entity = datastore.Entity(key=key)

                    entity.update(
                        dict(
                            key=entity_key.SerializeToString(),
                            values={
                                k: v.SerializeToString() for k, v in features.items()
                            },
                            event_ts=utils.make_tzaware(timestamp),
                            created_ts=(
                                utils.make_tzaware(created_ts)
                                if created_ts is not None
                                else None
                            ),
                        )
                    )
                    client.put(entity)
                    row_count += 1

                    if progress:
                        progress(1)
            break  # make sure to break out of retry loop if all went well
        except Conflict:
            if retry_number == num_retries_on_conflict - 1:
                raise
Пример #16
0
    def online_write_batch(
        self,
        config: RepoConfig,
        table: FeatureView,
        data: List[
            Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
        ],
        progress: Optional[Callable[[int], Any]],
    ) -> None:
        online_store_config = config.online_store
        assert isinstance(online_store_config, RedisOnlineStoreConfig)

        client = self._get_client(online_store_config)
        project = config.project

        feature_view = table.name
        ts_key = f"_ts:{feature_view}"
        keys = []
        # redis pipelining optimization: send multiple commands to redis server without waiting for every reply
        with client.pipeline() as pipe:
            # check if a previous record under the key bin exists
            # TODO: investigate if check and set is a better approach rather than pulling all entity ts and then setting
            # it may be significantly slower but avoids potential (rare) race conditions
            for entity_key, _, _, _ in data:
                redis_key_bin = _redis_key(project, entity_key)
                keys.append(redis_key_bin)
                pipe.hmget(redis_key_bin, ts_key)
            prev_event_timestamps = pipe.execute()
            # flattening the list of lists. `hmget` does the lookup assuming a list of keys in the key bin
            prev_event_timestamps = [i[0] for i in prev_event_timestamps]

            for redis_key_bin, prev_event_time, (_, values, timestamp, _) in zip(
                keys, prev_event_timestamps, data
            ):
                event_time_seconds = int(utils.make_tzaware(timestamp).timestamp())

                # ignore if event_timestamp is before the event features that are currently in the feature store
                if prev_event_time:
                    prev_ts = Timestamp()
                    prev_ts.ParseFromString(prev_event_time)
                    if prev_ts.seconds and event_time_seconds <= prev_ts.seconds:
                        # TODO: somehow signal that it's not overwriting the current record?
                        if progress:
                            progress(1)
                        continue

                ts = Timestamp()
                ts.seconds = event_time_seconds
                entity_hset = dict()
                entity_hset[ts_key] = ts.SerializeToString()

                for feature_name, val in values.items():
                    f_key = _mmh3(f"{feature_view}:{feature_name}")
                    entity_hset[f_key] = val.SerializeToString()

                pipe.hset(redis_key_bin, mapping=entity_hset)
                # TODO: support expiring the entity / features in Redis
                # otherwise entity features remain in redis until cleaned up in separate process
                # client.expire redis_key_bin based a ttl setting
            results = pipe.execute()
            if progress:
                progress(len(results))
Пример #17
0
    def materialize(
        self,
        start_date: datetime,
        end_date: datetime,
        feature_views: Optional[List[str]] = None,
    ) -> None:
        """
        Materialize data from the offline store into the online store.

        This method loads feature data in the specified interval from either
        the specified feature views, or all feature views if none are specified,
        into the online store where it is available for online serving.

        Args:
            start_date (datetime): Start date for time range of data to materialize into the online store
            end_date (datetime): End date for time range of data to materialize into the online store
            feature_views (List[str]): Optional list of feature view names. If selected, will only run
                materialization for the specified feature views.

        Examples:
            Materialize all features into the online store over the interval
            from 3 hours ago to 10 minutes ago.

            >>> from feast import FeatureStore, RepoConfig
            >>> from datetime import datetime, timedelta
            >>> fs = FeatureStore(repo_path="feature_repo")
            >>> fs.materialize(
            ...     start_date=datetime.utcnow() - timedelta(hours=3), end_date=datetime.utcnow() - timedelta(minutes=10)
            ... )
            Materializing...
            <BLANKLINE>
            ...
        """
        if utils.make_tzaware(start_date) > utils.make_tzaware(end_date):
            raise ValueError(
                f"The given start_date {start_date} is greater than the given end_date {end_date}."
            )

        feature_views_to_materialize = []
        if feature_views is None:
            feature_views_to_materialize = self._registry.list_feature_views(
                self.project)
        else:
            for name in feature_views:
                feature_view = self._registry.get_feature_view(
                    name, self.project)
                feature_views_to_materialize.append(feature_view)

        _print_materialization_log(
            start_date,
            end_date,
            len(feature_views_to_materialize),
            self.config.online_store.type,
        )
        # TODO paging large loads
        for feature_view in feature_views_to_materialize:
            provider = self._get_provider()
            print(
                f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:"
            )

            def tqdm_builder(length):
                return tqdm(total=length, ncols=100)

            start_date = utils.make_tzaware(start_date)
            end_date = utils.make_tzaware(end_date)

            provider.materialize_single_feature_view(
                config=self.config,
                feature_view=feature_view,
                start_date=start_date,
                end_date=end_date,
                registry=self._registry,
                project=self.project,
                tqdm_builder=tqdm_builder,
            )

            self._registry.apply_materialization(feature_view, self.project,
                                                 start_date, end_date)
Пример #18
0
def convert_timestamp_records_to_utc(records: List[Dict[str, Any]],
                                     column: str) -> List[Dict[str, Any]]:
    for record in records:
        record[column] = utils.make_tzaware(record[column]).astimezone(utc)
    return records
Пример #19
0
    def materialize_incremental(
        self,
        end_date: datetime,
        feature_views: Optional[List[str]] = None,
    ) -> None:
        """
        Materialize incremental new data from the offline store into the online store.

        This method loads incremental new feature data up to the specified end time from either
        the specified feature views, or all feature views if none are specified,
        into the online store where it is available for online serving. The start time of
        the interval materialized is either the most recent end time of a prior materialization or
        (now - ttl) if no such prior materialization exists.

        Args:
            end_date (datetime): End date for time range of data to materialize into the online store
            feature_views (List[str]): Optional list of feature view names. If selected, will only run
                materialization for the specified feature views.

        Examples:
            Materialize all features into the online store up to 5 minutes ago.

            >>> from datetime import datetime, timedelta
            >>> from feast.feature_store import FeatureStore
            >>>
            >>> fs = FeatureStore(config=RepoConfig(provider="gcp", registry="gs://my-fs/", project="my_fs_proj"))
            >>> fs.materialize_incremental(end_date=datetime.utcnow() - timedelta(minutes=5))
        """

        feature_views_to_materialize = []
        if feature_views is None:
            feature_views_to_materialize = self._registry.list_feature_views(
                self.project)
        else:
            for name in feature_views:
                feature_view = self._registry.get_feature_view(
                    name, self.project)
                feature_views_to_materialize.append(feature_view)

        _print_materialization_log(
            None,
            end_date,
            len(feature_views_to_materialize),
            self.config.online_store.type,
        )
        # TODO paging large loads
        for feature_view in feature_views_to_materialize:
            start_date = feature_view.most_recent_end_time
            if start_date is None:
                if feature_view.ttl is None:
                    raise Exception(
                        f"No start time found for feature view {feature_view.name}. materialize_incremental() requires"
                        f" either a ttl to be set or for materialize() to have been run at least once."
                    )
                start_date = datetime.utcnow() - feature_view.ttl
            provider = self._get_provider()
            print(
                f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}"
                f" from {Style.BRIGHT + Fore.GREEN}{start_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}"
                f" to {Style.BRIGHT + Fore.GREEN}{end_date.replace(microsecond=0).astimezone()}{Style.RESET_ALL}:"
            )

            def tqdm_builder(length):
                return tqdm(total=length, ncols=100)

            start_date = utils.make_tzaware(start_date)
            end_date = utils.make_tzaware(end_date)

            provider.materialize_single_feature_view(
                feature_view,
                start_date,
                end_date,
                self._registry,
                self.project,
                tqdm_builder,
            )

            self._registry.apply_materialization(feature_view, self.project,
                                                 start_date, end_date)
Пример #20
0
def convert_timestamp_records_to_utc(records, column):
    for record in records:
        record[column] = utils.make_tzaware(record[column]).astimezone(utc)
    return records