def write( self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient, ): """Loads the data from a feature set into the Historical Feature Store. Args: feature_set: object processed with feature_set informations. dataframe: spark dataframe containing data from a feature set. spark_client: client for spark connections with external services. If the debug_mode is set to True, a temporary table with a name in the format: historical_feature_store__{feature_set.name} will be created instead of writing to the real historical feature store. """ dataframe = self._create_partitions(dataframe) if self.debug_mode: spark_client.create_temporary_view( dataframe=dataframe, name=f"historical_feature_store__{feature_set.name}", ) return s3_key = os.path.join("historical", feature_set.entity, feature_set.name) spark_client.write_table( dataframe=dataframe, database=self.database, table_name=feature_set.name, partition_by=self.PARTITION_BY, **self.db_config.get_options(s3_key), )
def test_write_table_with_invalid_params(self, database, table_name, path): df_writer = "not a spark df writer" with pytest.raises(ValueError): SparkClient.write_table(dataframe=df_writer, database=database, table_name=table_name, path=path)
def test_write_table(self, format, mode, database, table_name, path, mocked_spark_write): # given name = "{}.{}".format(database, table_name) # when SparkClient.write_table( dataframe=mocked_spark_write, database=database, table_name=table_name, format_=format, mode=mode, path=path, ) # then mocked_spark_write.saveAsTable.assert_called_with(mode=mode, format=format, partitionBy=None, name=name, path=path)