def write(
        self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient,
    ):
        """Loads the data from a feature set into the Historical Feature Store.

        Args:
            feature_set: object processed with feature_set informations.
            dataframe: spark dataframe containing data from a feature set.
            spark_client: client for spark connections with external services.

        If the debug_mode is set to True, a temporary table with a name in the format:
        historical_feature_store__{feature_set.name} will be created instead of writing
        to the real historical feature store.

        """
        dataframe = self._create_partitions(dataframe)

        if self.debug_mode:
            spark_client.create_temporary_view(
                dataframe=dataframe,
                name=f"historical_feature_store__{feature_set.name}",
            )
            return

        s3_key = os.path.join("historical", feature_set.entity, feature_set.name)
        spark_client.write_table(
            dataframe=dataframe,
            database=self.database,
            table_name=feature_set.name,
            partition_by=self.PARTITION_BY,
            **self.db_config.get_options(s3_key),
        )
示例#2
0
    def test_create_temporary_view(self, target_df, spark_session):
        # arrange
        spark_client = SparkClient()

        # act
        spark_client.create_temporary_view(target_df, "temp_view")
        result_df = spark_session.table("temp_view")

        # assert
        assert_dataframe_equality(target_df, result_df)
 def _write_in_debug_mode(feature_set: FeatureSet, dataframe: DataFrame,
                          spark_client: SparkClient):
     """Creates a temporary table instead of writing to the real data source."""
     return spark_client.create_temporary_view(
         dataframe=dataframe,
         name=f"online_feature_store__{feature_set.name}")