def test_append_dataframe(self): hdfs_path = self.path_ + '/test_df.parquet' df = utils.create_dataframe([Row(column1=1, column2=2)], schema=None) utils.append(df, hdfs_path) new_df = utils.read_files_from_HDFS(hdfs_path) self.assertEqual(new_df.count(), 1) df = utils.create_dataframe([Row(column1=3, column2=4)], schema=None) utils.append(df, hdfs_path) appended_df = utils.read_files_from_HDFS(hdfs_path) self.assertEqual(appended_df.count(), 2)
def test_append_dataframe(self): path_ = 'test_df.parquet' hdfs_path = os.path.join(config.HDFS_CLUSTER_URI, path_) df = utils.create_dataframe(Row(column1=1, column2=2), schema=None) utils.append(df, hdfs_path) new_df = utils.read_files_from_HDFS(hdfs_path) self.assertEqual(new_df.count(), 1) df = utils.create_dataframe(Row(column1=3, column2=4), schema=None) utils.append(df, hdfs_path) appended_df = utils.read_files_from_HDFS(hdfs_path) self.assertEqual(appended_df.count(), 2)
def save_dataframe_metadata_to_HDFS(metadata): """ Save dataframe metadata to model_metadata dataframe. """ # Convert metadata to row object. metadata_row = schema.convert_model_metadata_to_row(metadata) try: # Create dataframe from the row object. dataframe_metadata = utils.create_dataframe( metadata_row, schema.model_metadata_schema) except DataFrameNotCreatedException as err: current_app.logger.error(str(err), exc_info=True) sys.exit(-1) try: # Append the dataframe to existing dataframe if already exist or create a new one. utils.append(dataframe_metadata, path.MODEL_METADATA) except DataFrameNotAppendedException as err: current_app.logger.error(str(err), exc_info=True) sys.exit(-1)
def save_dataframe_metadata_to_hdfs(metadata): """ Save dataframe metadata. """ # Convert metadata to row object. metadata_row = schema.convert_dataframe_metadata_to_row(metadata) try: # Create dataframe from the row object. dataframe_metadata = utils.create_dataframe(metadata_row, schema.dataframe_metadata_schema) except DataFrameNotCreatedException as err: current_app.logger.error(str(err), exc_info=True) raise try: # Append the dataframe to existing dataframe if already exists or create a new one. utils.append(dataframe_metadata, path.DATAFRAME_METADATA) except DataFrameNotAppendedException as err: current_app.logger.error(str(err), exc_info=True) raise
def save_dataframe_metadata_to_hdfs(metadata: dict, df_metadata_path: str): """ Save dataframe metadata. Args: metadata (dict): metadata dataframe to append. df_metadata_path (str): path where metadata dataframe should be saved. """ # Convert metadata to row object. metadata_row = schema.convert_dataframe_metadata_to_row(metadata) try: # Create dataframe from the row object. dataframe_metadata = utils.create_dataframe(metadata_row, schema.dataframe_metadata_schema) except DataFrameNotCreatedException as err: current_app.logger.error(str(err), exc_info=True) raise try: # Append the dataframe to existing dataframe if already exists or create a new one. utils.append(dataframe_metadata, df_metadata_path) except DataFrameNotAppendedException as err: current_app.logger.error(str(err), exc_info=True) raise
def save_model_metadata_to_hdfs(metadata): """ Save model metadata. Args: metadata: dict containing model metadata. """ metadata_row = schema.convert_model_metadata_to_row(metadata) try: # Create dataframe from the row object. model_metadata_df = utils.create_dataframe(metadata_row, schema.model_metadata_schema) except DataFrameNotCreatedException as err: current_app.logger.error(str(err), exc_info=True) raise try: current_app.logger.info('Saving model metadata...') # Append the dataframe to existing dataframe if already exist or create a new one. utils.append(model_metadata_df, path.MODEL_METADATA) current_app.logger.info('Model metadata saved...') except DataFrameNotAppendedException as err: current_app.logger.error(str(err), exc_info=True) raise