Exemplo n.º 1
0
 def _save_offline_dataframe(
     self,
     feature_group,
     dataframe,
     operation,
     write_options,
     validation_id=None,
 ):
     if feature_group.time_travel_format == "HUDI":
         hudi_engine_instance = hudi_engine.HudiEngine(
             feature_group.feature_store_id,
             feature_group.feature_store_name,
             feature_group,
             self._spark_session,
             self._spark_context,
         )
         hudi_engine_instance.save_hudi_fg(dataframe, self.APPEND,
                                           operation, write_options,
                                           validation_id)
     else:
         dataframe.write.format(self.HIVE_FORMAT).mode(
             self.APPEND).options(**write_options).partitionBy(
                 feature_group.partition_key if feature_group.
                 partition_key else []).saveAsTable(
                     feature_group._get_table_name())
 def commit_delete(feature_group, delete_df, write_options):
     hudi_engine_instance = hudi_engine.HudiEngine(
         feature_group.feature_store_id,
         feature_group.feature_store_name,
         feature_group,
         engine.get_instance()._spark_context,
         engine.get_instance()._spark_session,
     )
     return hudi_engine_instance.delete_record(delete_df, write_options)
Exemplo n.º 3
0
 def register_hudi_temporary_table(self, hudi_fg_alias, feature_store_id,
                                   feature_store_name, read_options):
     hudi_engine_instance = hudi_engine.HudiEngine(
         feature_store_id,
         feature_store_name,
         hudi_fg_alias.feature_group,
         self._spark_context,
         self._spark_session,
     )
     hudi_engine_instance.register_temporary_table(
         hudi_fg_alias.alias,
         hudi_fg_alias.left_feature_group_start_timestamp,
         hudi_fg_alias.left_feature_group_end_timestamp,
         read_options,
     )
Exemplo n.º 4
0
 def commit_details(self, feature_group, limit):
     hudi_engine_instance = hudi_engine.HudiEngine(
         feature_group.feature_store_id,
         feature_group.feature_store_name,
         feature_group,
         engine.get_instance()._spark_context,
         engine.get_instance()._spark_session,
     )
     feature_group_commits = self._feature_group_api.commit_details(
         feature_group, limit
     )
     commit_details = {}
     for feature_group_commit in feature_group_commits:
         commit_details[feature_group_commit.commitid] = {
             "committedOn": hudi_engine_instance._timestamp_to_hudiformat(
                 feature_group_commit.commitid
             ),
             "rowsUpdated": feature_group_commit.rows_updated,
             "rowsInserted": feature_group_commit.rows_inserted,
             "rowsDeleted": feature_group_commit.rows_deleted,
         }
     return commit_details