def write(
        self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient,
    ):
        """Loads the data from a feature set into the Historical Feature Store.

        Args:
            feature_set: object processed with feature_set informations.
            dataframe: spark dataframe containing data from a feature set.
            spark_client: client for spark connections with external services.

        If the debug_mode is set to True, a temporary table with a name in the format:
        historical_feature_store__{feature_set.name} will be created instead of writing
        to the real historical feature store.

        """
        dataframe = self._create_partitions(dataframe)

        if self.debug_mode:
            spark_client.create_temporary_view(
                dataframe=dataframe,
                name=f"historical_feature_store__{feature_set.name}",
            )
            return

        s3_key = os.path.join("historical", feature_set.entity, feature_set.name)
        spark_client.write_table(
            dataframe=dataframe,
            database=self.database,
            table_name=feature_set.name,
            partition_by=self.PARTITION_BY,
            **self.db_config.get_options(s3_key),
        )
示例#2
0
    def test_create_temporary_view(self, target_df: DataFrame,
                                   spark_session: SparkSession) -> None:
        # arrange
        spark_client = SparkClient()

        # act
        spark_client.create_temporary_view(target_df, "temp_view")
        result_df = spark_session.table("temp_view")

        # assert
        assert_dataframe_equality(target_df, result_df)
    def write(
        self,
        feature_set: FeatureSet,
        dataframe: DataFrame,
        spark_client: SparkClient,
    ) -> None:
        """Loads the data from a feature set into the Historical Feature Store.

        Args:
            feature_set: object processed with feature_set informations.
            dataframe: spark dataframe containing data from a feature set.
            spark_client: client for spark connections with external services.

        If the debug_mode is set to True, a temporary table with a name in the format:
        historical_feature_store__{feature_set.name} will be created instead of writing
        to the real historical feature store.

        """
        dataframe = self._create_partitions(dataframe)

        dataframe = self._apply_transformations(dataframe)

        if self.interval_mode:
            partition_overwrite_mode = spark_client.conn.conf.get(
                "spark.sql.sources.partitionOverwriteMode").lower()

            if partition_overwrite_mode != "dynamic":
                raise RuntimeError(
                    "m=load_incremental_table, "
                    "spark.sql.sources.partitionOverwriteMode={}, "
                    "msg=partitionOverwriteMode have to "
                    "be configured to 'dynamic'".format(
                        partition_overwrite_mode))

        if self.debug_mode:
            spark_client.create_temporary_view(
                dataframe=dataframe,
                name=f"historical_feature_store__{feature_set.name}",
            )
            return

        s3_key = os.path.join("historical", feature_set.entity,
                              feature_set.name)

        spark_client.write_table(
            dataframe=dataframe,
            database=self.database,
            table_name=feature_set.name,
            partition_by=self.PARTITION_BY,
            **self.db_config.get_options(s3_key),
        )
示例#4
0
 def _write_in_debug_mode(
         table_name: str, dataframe: DataFrame,
         spark_client: SparkClient) -> Union[StreamingQuery, None]:
     """Creates a temporary table instead of writing to the real feature store."""
     return spark_client.create_temporary_view(
         dataframe=dataframe, name=f"online_feature_store__{table_name}")