Exemplo n.º 1
0
def apply_total(repo_config: RepoConfig, repo_path: Path):
    os.chdir(repo_path)
    sys.path.append("")

    project = repo_config.project
    registry = Registry(repo_config.metadata_store)
    repo = parse_repo(repo_path)

    for entity in repo.entities:
        registry.apply_entity(entity, project=project)

    repo_table_names = set(t.name for t in repo.feature_tables)

    for t in repo.feature_views:
        repo_table_names.add(t.name)

    tables_to_delete = []
    for registry_table in registry.list_feature_tables(project=project):
        if registry_table.name not in repo_table_names:
            tables_to_delete.append(registry_table)

    views_to_delete = []
    for registry_view in registry.list_feature_views(project=project):
        if registry_view.name not in repo_table_names:
            views_to_delete.append(registry_view)

    # Delete tables that should not exist
    for registry_table in tables_to_delete:
        registry.delete_feature_table(registry_table.name, project=project)

    # Create tables that should
    for table in repo.feature_tables:
        registry.apply_feature_table(table, project)

    # Delete views that should not exist
    for registry_view in views_to_delete:
        registry.delete_feature_view(registry_view.name, project=project)

    # Create views that should
    for view in repo.feature_views:
        registry.apply_feature_view(view, project)

    infra_provider = get_provider(repo_config)

    all_to_delete: List[Union[FeatureTable, FeatureView]] = []
    all_to_delete.extend(tables_to_delete)
    all_to_delete.extend(views_to_delete)

    all_to_keep: List[Union[FeatureTable, FeatureView]] = []
    all_to_keep.extend(repo.feature_tables)
    all_to_keep.extend(repo.feature_views)

    infra_provider.update_infra(
        project,
        tables_to_delete=all_to_delete,
        tables_to_keep=all_to_keep,
        partial=False,
    )

    print("Done!")
Exemplo n.º 2
0
def s3_registry():
    registry_config = RegistryConfig(
        path=
        f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db",
        cache_ttl_seconds=600,
    )
    return Registry(registry_config, None)
Exemplo n.º 3
0
    def __init__(
        self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None,
    ):
        self.repo_path = repo_path
        if repo_path is not None and config is not None:
            raise ValueError("You cannot specify both repo_path and config")
        if config is not None:
            self.config = config
        elif repo_path is not None:
            self.config = load_repo_config(Path(repo_path))
        else:
            self.config = RepoConfig(
                registry="./registry.db",
                project="default",
                provider="local",
                online_store=OnlineStoreConfig(
                    local=LocalOnlineStoreConfig(path="online_store.db")
                ),
            )

        registry_config = self.config.get_registry_config()
        self._registry = Registry(
            registry_path=registry_config.path,
            cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
        )
        self._tele = Telemetry()
Exemplo n.º 4
0
    def __init__(
        self,
        repo_path: Optional[str] = None,
        config: Optional[RepoConfig] = None,
    ):
        """
        Creates a FeatureStore object.

        Raises:
            ValueError: If both or neither of repo_path and config are specified.
        """
        if repo_path is not None and config is not None:
            raise ValueError("You cannot specify both repo_path and config.")
        if config is not None:
            self.repo_path = Path(os.getcwd())
            self.config = config
        elif repo_path is not None:
            self.repo_path = Path(repo_path)
            self.config = load_repo_config(Path(repo_path))
        else:
            raise ValueError("Please specify one of repo_path or config.")

        registry_config = self.config.get_registry_config()
        self._registry = Registry(
            registry_path=registry_config.path,
            repo_path=self.repo_path,
            cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
        )
Exemplo n.º 5
0
def apply_total(repo_config: RepoConfig, repo_path: Path):
    os.chdir(repo_path)
    sys.path.append("")

    project = repo_config.project
    registry = Registry(repo_config.metadata_store)
    repo = parse_repo(repo_path)

    for entity in repo.entities:
        registry.apply_entity(entity, project=project)

    repo_table_names = set(t.name for t in repo.feature_tables)
    tables_to_delete = []
    for registry_table in registry.list_feature_tables(project=project):
        if registry_table.name not in repo_table_names:
            tables_to_delete.append(registry_table)

    # Delete tables that should not exist
    for registry_table in tables_to_delete:
        registry.delete_feature_table(registry_table.name, project=project)

    for table in repo.feature_tables:
        registry.apply_feature_table(table, project)

    infra_provider = get_provider(repo_config)
    infra_provider.update_infra(project,
                                tables_to_delete=tables_to_delete,
                                tables_to_keep=repo.feature_tables)

    print("Done!")
Exemplo n.º 6
0
    def __init__(
        self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None,
    ):
        """ Initializes a new FeatureStore object. Used to manage a feature store.

        Args:
            repo_path: Path to a `feature_store.yaml` used to configure the feature store
            config (RepoConfig): Configuration object used to configure the feature store
        """
        if repo_path is not None and config is not None:
            raise ValueError("You cannot specify both repo_path and config")
        if config is not None:
            self.repo_path = Path(os.getcwd())
            self.config = config
        elif repo_path is not None:
            self.repo_path = Path(repo_path)
            self.config = load_repo_config(Path(repo_path))
        else:
            raise ValueError("Please specify one of repo_path or config")

        registry_config = self.config.get_registry_config()
        self._registry = Registry(
            registry_path=registry_config.path,
            repo_path=self.repo_path,
            cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
        )
        self._tele = Telemetry()
Exemplo n.º 7
0
def registry_dump(repo_config: RepoConfig, repo_path: Path):
    """For debugging only: output contents of the metadata registry"""
    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    registry = Registry(registry_config=registry_config, repo_path=repo_path)
    registry_dict = registry.to_dict(project=project)

    click.echo(json.dumps(registry_dict, indent=2, sort_keys=True))
Exemplo n.º 8
0
def teardown(repo_config: RepoConfig, repo_path: Path):
    registry = Registry(repo_config.metadata_store)
    project = repo_config.project
    registry_tables: List[Union[FeatureTable, FeatureView]] = []
    registry_tables.extend(registry.list_feature_tables(project=project))
    registry_tables.extend(registry.list_feature_views(project=project))
    infra_provider = get_provider(repo_config)
    infra_provider.teardown_infra(project, tables=registry_tables)
Exemplo n.º 9
0
def registry_dump(repo_config: RepoConfig, repo_path: Path):
    """ For debugging only: output contents of the metadata registry """
    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    registry = Registry(registry_config=registry_config, repo_path=repo_path)

    for entity in registry.list_entities(project=project):
        print(entity)
    for feature_view in registry.list_feature_views(project=project):
        print(feature_view)
Exemplo n.º 10
0
def registry_dump(repo_config: RepoConfig):
    """ For debugging only: output contents of the metadata registry """

    project = repo_config.project
    registry = Registry(repo_config.metadata_store)

    for entity in registry.list_entities(project=project):
        print(entity)
    for table in registry.list_feature_tables(project=project):
        print(table)
Exemplo n.º 11
0
def gcs_registry():
    from google.cloud import storage

    storage_client = storage.Client()
    bucket_name = f"feast-registry-test-{int(time.time() * 1000)}"
    bucket = storage_client.bucket(bucket_name)
    bucket = storage_client.create_bucket(bucket)
    bucket.add_lifecycle_delete_rule(
        age=14)  # delete buckets automatically after 14 days
    bucket.patch()
    bucket.blob("registry.db")
    return Registry(f"gs://{bucket_name}/registry.db", None, timedelta(600))
Exemplo n.º 12
0
def teardown(repo_config: RepoConfig, repo_path: Path):
    registry_config = repo_config.get_registry_config()
    registry = Registry(
        registry_path=registry_config.path,
        cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
    )
    project = repo_config.project
    registry_tables: List[Union[FeatureTable, FeatureView]] = []
    registry_tables.extend(registry.list_feature_tables(project=project))
    registry_tables.extend(registry.list_feature_views(project=project))
    infra_provider = get_provider(repo_config)
    infra_provider.teardown_infra(project, tables=registry_tables)
Exemplo n.º 13
0
def registry_dump(repo_config: RepoConfig):
    """ For debugging only: output contents of the metadata registry """
    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    registry = Registry(
        registry_path=registry_config.path,
        cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
    )

    for entity in registry.list_entities(project=project):
        print(entity)
    for table in registry.list_feature_tables(project=project):
        print(table)
Exemplo n.º 14
0
def gcs_registry() -> Registry:
    from google.cloud import storage

    storage_client = storage.Client()
    bucket_name = f"feast-registry-test-{int(time.time() * 1000)}"
    bucket = storage_client.bucket(bucket_name)
    bucket = storage_client.create_bucket(bucket)
    bucket.add_lifecycle_delete_rule(
        age=14)  # delete buckets automatically after 14 days
    bucket.patch()
    bucket.blob("registry.db")
    registry_config = RegistryConfig(path=f"gs://{bucket_name}/registry.db",
                                     cache_ttl_seconds=600)
    return Registry(registry_config, None)
Exemplo n.º 15
0
def registry_dump(repo_config: RepoConfig, repo_path: Path):
    """ For debugging only: output contents of the metadata registry """
    from colorama import Fore, Style

    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    registry = Registry(registry_config=registry_config, repo_path=repo_path)
    registry_dict = registry.to_dict(project=project)

    warning = (
        "Warning: The registry-dump command is for debugging only and may contain "
        "breaking changes in the future. No guarantees are made on this interface."
    )
    click.echo(f"{Style.BRIGHT}{Fore.YELLOW}{warning}{Style.RESET_ALL}")
    click.echo(json.dumps(registry_dict, indent=2))
Exemplo n.º 16
0
    def refresh_registry(self):
        """Fetches and caches a copy of the feature registry in memory.

        Explicitly calling this method allows for direct control of the state of the registry cache. Every time this
        method is called the complete registry state will be retrieved from the remote registry store backend
        (e.g., GCS, S3), and the cache timer will be reset. If refresh_registry() is run before get_online_features()
        is called, then get_online_feature() will use the cached registry instead of retrieving (and caching) the
        registry itself.

        Additionally, the TTL for the registry cache can be set to infinity (by setting it to 0), which means that
        refresh_registry() will become the only way to update the cached registry. If the TTL is set to a value
        greater than 0, then once the cache becomes stale (more time than the TTL has passed), a new cache will be
        downloaded synchronously, which may increase latencies if the triggering method is get_online_features()
        """
        registry_config = self.config.get_registry_config()
        self._registry = Registry(registry_config, repo_path=self.repo_path)
        self._registry.refresh()
Exemplo n.º 17
0
    def __init__(
        self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None,
    ):
        if repo_path is not None and config is not None:
            raise ValueError("You cannot specify both repo_path and config")
        if config is not None:
            self.repo_path = Path(os.getcwd())
            self.config = config
        elif repo_path is not None:
            self.repo_path = Path(repo_path)
            self.config = load_repo_config(Path(repo_path))
        else:
            raise ValueError("Please specify one of repo_path or config")

        registry_config = self.config.get_registry_config()
        self._registry = Registry(
            registry_path=registry_config.path,
            repo_path=self.repo_path,
            cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
        )
Exemplo n.º 18
0
 def _registry(self):
     if self._registry_impl is None:
         self._registry_impl = Registry(self._config.get(opt.REGISTRY_PATH))
     return self._registry_impl
Exemplo n.º 19
0
def apply_total(repo_config: RepoConfig, repo_path: Path,
                skip_source_validation: bool):
    from colorama import Fore, Style

    os.chdir(repo_path)
    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    if not is_valid_name(project):
        print(
            f"{project} is not valid. Project name should only have "
            f"alphanumerical values and underscores but not start with an underscore."
        )
        sys.exit(1)
    registry = Registry(
        registry_path=registry_config.path,
        repo_path=repo_path,
        cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
    )
    registry._initialize_registry()
    sys.dont_write_bytecode = True
    repo = parse_repo(repo_path)
    _validate_feature_views(repo.feature_views)
    data_sources = [t.batch_source for t in repo.feature_views]

    if not skip_source_validation:
        # Make sure the data source used by this feature view is supported by Feast
        for data_source in data_sources:
            data_source.validate(repo_config)

    # Make inferences
    update_entities_with_inferred_types_from_feature_views(
        repo.entities, repo.feature_views, repo_config)
    update_data_sources_with_inferred_event_timestamp_col(
        data_sources, repo_config)
    for view in repo.feature_views:
        view.infer_features_from_batch_source(repo_config)

    repo_table_names = set(t.name for t in repo.feature_tables)

    for t in repo.feature_views:
        repo_table_names.add(t.name)

    tables_to_delete = []
    for registry_table in registry.list_feature_tables(project=project):
        if registry_table.name not in repo_table_names:
            tables_to_delete.append(registry_table)

    views_to_delete = []
    for registry_view in registry.list_feature_views(project=project):
        if registry_view.name not in repo_table_names:
            views_to_delete.append(registry_view)

    sys.dont_write_bytecode = False
    for entity in repo.entities:
        registry.apply_entity(entity, project=project, commit=False)
        click.echo(
            f"Registered entity {Style.BRIGHT + Fore.GREEN}{entity.name}{Style.RESET_ALL}"
        )

    # Delete tables that should not exist
    for registry_table in tables_to_delete:
        registry.delete_feature_table(registry_table.name,
                                      project=project,
                                      commit=False)
        click.echo(
            f"Deleted feature table {Style.BRIGHT + Fore.GREEN}{registry_table.name}{Style.RESET_ALL} from registry"
        )

    # Create tables that should
    for table in repo.feature_tables:
        registry.apply_feature_table(table, project, commit=False)
        click.echo(
            f"Registered feature table {Style.BRIGHT + Fore.GREEN}{table.name}{Style.RESET_ALL}"
        )

    # Delete views that should not exist
    for registry_view in views_to_delete:
        registry.delete_feature_view(registry_view.name,
                                     project=project,
                                     commit=False)
        click.echo(
            f"Deleted feature view {Style.BRIGHT + Fore.GREEN}{registry_view.name}{Style.RESET_ALL} from registry"
        )

    # Create views that should exist
    for view in repo.feature_views:
        registry.apply_feature_view(view, project, commit=False)
        click.echo(
            f"Registered feature view {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}"
        )
    registry.commit()

    apply_feature_services(registry, project, repo)

    infra_provider = get_provider(repo_config, repo_path)

    all_to_delete: List[Union[FeatureTable, FeatureView]] = []
    all_to_delete.extend(tables_to_delete)
    all_to_delete.extend(views_to_delete)

    all_to_keep: List[Union[FeatureTable, FeatureView]] = []
    all_to_keep.extend(repo.feature_tables)
    all_to_keep.extend(repo.feature_views)

    entities_to_delete: List[Entity] = []
    repo_entities_names = set([e.name for e in repo.entities])
    for registry_entity in registry.list_entities(project=project):
        if registry_entity.name not in repo_entities_names:
            entities_to_delete.append(registry_entity)

    entities_to_keep: List[Entity] = repo.entities

    for name in [view.name for view in repo.feature_tables
                 ] + [table.name for table in repo.feature_views]:
        click.echo(
            f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}"
        )
    for name in [view.name for view in views_to_delete
                 ] + [table.name for table in tables_to_delete]:
        click.echo(
            f"Removing infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}"
        )

    infra_provider.update_infra(
        project,
        tables_to_delete=all_to_delete,
        tables_to_keep=all_to_keep,
        entities_to_delete=entities_to_delete,
        entities_to_keep=entities_to_keep,
        partial=False,
    )
Exemplo n.º 20
0
 def _get_registry(self) -> Registry:
     return Registry(self.config.metadata_store)
Exemplo n.º 21
0
def teardown(repo_config: RepoConfig, repo_path: Path):
    registry = Registry(repo_config.metadata_store)
    project = repo_config.project
    registry_tables = registry.list_feature_tables(project=project)
    infra_provider = get_provider(repo_config)
    infra_provider.teardown_infra(project, tables=registry_tables)
Exemplo n.º 22
0
def test_commit():
    fd, registry_path = mkstemp()
    registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600)
    test_registry = Registry(registry_config, None)

    entity = Entity(
        name="driver_car_id",
        description="Car driver id",
        value_type=ValueType.STRING,
        labels={"team": "matchmaking"},
    )

    project = "project"

    # Register Entity without commiting
    test_registry.apply_entity(entity, project, commit=False)

    # Retrieving the entity should still succeed
    entities = test_registry.list_entities(project, allow_cache=True)

    entity = entities[0]
    assert (len(entities) == 1 and entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    entity = test_registry.get_entity("driver_car_id",
                                      project,
                                      allow_cache=True)
    assert (entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    # Create new registry that points to the same store
    registry_with_same_store = Registry(registry_config, None)

    # Retrieving the entity should fail since the store is empty
    entities = registry_with_same_store.list_entities(project)
    assert len(entities) == 0

    # commit from the original registry
    test_registry.commit()

    # Reconstruct the new registry in order to read the newly written store
    registry_with_same_store = Registry(registry_config, None)

    # Retrieving the entity should now succeed
    entities = registry_with_same_store.list_entities(project)

    entity = entities[0]
    assert (len(entities) == 1 and entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    entity = test_registry.get_entity("driver_car_id", project)
    assert (entity.name == "driver_car_id"
            and entity.value_type == ValueType(ValueProto.ValueType.STRING)
            and entity.description == "Car driver id"
            and "team" in entity.labels
            and entity.labels["team"] == "matchmaking")

    test_registry.teardown()

    # Will try to reload registry, which will fail because the file has been deleted
    with pytest.raises(FileNotFoundError):
        test_registry._get_registry_proto()
Exemplo n.º 23
0
def local_registry():
    fd, registry_path = mkstemp()
    registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600)
    return Registry(registry_config, None)
Exemplo n.º 24
0
def s3_registry():
    return Registry(
        f"s3://feast-integration-tests/registries/{int(time.time() * 1000)}/registry.db",
        None,
        timedelta(600),
    )
Exemplo n.º 25
0
def apply_total(repo_config: RepoConfig, repo_path: Path):
    from colorama import Fore, Style

    os.chdir(repo_path)
    sys.path.append("")
    registry_config = repo_config.get_registry_config()
    project = repo_config.project
    registry = Registry(
        registry_path=registry_config.path,
        repo_path=repo_path,
        cache_ttl=timedelta(seconds=registry_config.cache_ttl_seconds),
    )
    registry._initialize_registry()
    sys.dont_write_bytecode = True
    repo = parse_repo(repo_path)
    sys.dont_write_bytecode = False
    for entity in repo.entities:
        registry.apply_entity(entity, project=project)
        click.echo(
            f"Registered entity {Style.BRIGHT + Fore.GREEN}{entity.name}{Style.RESET_ALL}"
        )

    repo_table_names = set(t.name for t in repo.feature_tables)

    for t in repo.feature_views:
        repo_table_names.add(t.name)

    tables_to_delete = []
    for registry_table in registry.list_feature_tables(project=project):
        if registry_table.name not in repo_table_names:
            tables_to_delete.append(registry_table)

    views_to_delete = []
    for registry_view in registry.list_feature_views(project=project):
        if registry_view.name not in repo_table_names:
            views_to_delete.append(registry_view)

    # Delete tables that should not exist
    for registry_table in tables_to_delete:
        registry.delete_feature_table(registry_table.name, project=project)
        click.echo(
            f"Deleted feature table {Style.BRIGHT + Fore.GREEN}{registry_table.name}{Style.RESET_ALL} from registry"
        )

    # Create tables that should
    for table in repo.feature_tables:
        registry.apply_feature_table(table, project)
        click.echo(
            f"Registered feature table {Style.BRIGHT + Fore.GREEN}{registry_table.name}{Style.RESET_ALL}"
        )

    # Delete views that should not exist
    for registry_view in views_to_delete:
        registry.delete_feature_view(registry_view.name, project=project)
        click.echo(
            f"Deleted feature view {Style.BRIGHT + Fore.GREEN}{registry_view.name}{Style.RESET_ALL} from registry"
        )

    # Create views that should
    for view in repo.feature_views:
        registry.apply_feature_view(view, project)
        click.echo(
            f"Registered feature view {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}"
        )

    infra_provider = get_provider(repo_config, repo_path)

    all_to_delete: List[Union[FeatureTable, FeatureView]] = []
    all_to_delete.extend(tables_to_delete)
    all_to_delete.extend(views_to_delete)

    all_to_keep: List[Union[FeatureTable, FeatureView]] = []
    all_to_keep.extend(repo.feature_tables)
    all_to_keep.extend(repo.feature_views)

    for name in [view.name for view in repo.feature_tables
                 ] + [table.name for table in repo.feature_views]:
        click.echo(
            f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}"
        )
    for name in [view.name for view in views_to_delete
                 ] + [table.name for table in tables_to_delete]:
        click.echo(
            f"Removing infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}"
        )
    infra_provider.update_infra(
        project,
        tables_to_delete=all_to_delete,
        tables_to_keep=all_to_keep,
        partial=False,
    )
Exemplo n.º 26
0
def local_registry():
    fd, registry_path = mkstemp()
    return Registry(registry_path, None, timedelta(600))