def test_append_dataframe(self):
        hdfs_path = self.path_ + '/test_df.parquet'
        df = utils.create_dataframe([Row(column1=1, column2=2)], schema=None)
        utils.append(df, hdfs_path)
        new_df = utils.read_files_from_HDFS(hdfs_path)
        self.assertEqual(new_df.count(), 1)

        df = utils.create_dataframe([Row(column1=3, column2=4)], schema=None)
        utils.append(df, hdfs_path)
        appended_df = utils.read_files_from_HDFS(hdfs_path)
        self.assertEqual(appended_df.count(), 2)
示例#2
0
    def test_append_dataframe(self):
        path_ = 'test_df.parquet'
        hdfs_path = os.path.join(config.HDFS_CLUSTER_URI, path_)

        df = utils.create_dataframe(Row(column1=1, column2=2), schema=None)
        utils.append(df, hdfs_path)
        new_df = utils.read_files_from_HDFS(hdfs_path)
        self.assertEqual(new_df.count(), 1)

        df = utils.create_dataframe(Row(column1=3, column2=4), schema=None)
        utils.append(df, hdfs_path)
        appended_df = utils.read_files_from_HDFS(hdfs_path)
        self.assertEqual(appended_df.count(), 2)
def save_dataframe_metadata_to_HDFS(metadata):
    """ Save dataframe metadata to model_metadata dataframe.
    """
    # Convert metadata to row object.
    metadata_row = schema.convert_model_metadata_to_row(metadata)
    try:
        # Create dataframe from the row object.
        dataframe_metadata = utils.create_dataframe(
            metadata_row, schema.model_metadata_schema)
    except DataFrameNotCreatedException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)
    try:
        # Append the dataframe to existing dataframe if already exist or create a new one.
        utils.append(dataframe_metadata, path.MODEL_METADATA)
    except DataFrameNotAppendedException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)
示例#4
0
def save_dataframe_metadata_to_hdfs(metadata):
    """ Save dataframe metadata.
    """
    # Convert metadata to row object.
    metadata_row = schema.convert_dataframe_metadata_to_row(metadata)
    try:
        # Create dataframe from the row object.
        dataframe_metadata = utils.create_dataframe(metadata_row, schema.dataframe_metadata_schema)
    except DataFrameNotCreatedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise

    try:
        # Append the dataframe to existing dataframe if already exists or create a new one.
        utils.append(dataframe_metadata, path.DATAFRAME_METADATA)
    except DataFrameNotAppendedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise
示例#5
0
def save_dataframe_metadata_to_hdfs(metadata: dict, df_metadata_path: str):
    """ Save dataframe metadata.

        Args:
            metadata (dict): metadata dataframe to append.
            df_metadata_path (str): path where metadata dataframe should be saved.
    """
    # Convert metadata to row object.
    metadata_row = schema.convert_dataframe_metadata_to_row(metadata)
    try:
        # Create dataframe from the row object.
        dataframe_metadata = utils.create_dataframe(metadata_row, schema.dataframe_metadata_schema)
    except DataFrameNotCreatedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise

    try:
        # Append the dataframe to existing dataframe if already exists or create a new one.
        utils.append(dataframe_metadata, df_metadata_path)
    except DataFrameNotAppendedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise
示例#6
0
def save_model_metadata_to_hdfs(metadata):
    """ Save model metadata.

        Args:
            metadata: dict containing model metadata.
    """
    metadata_row = schema.convert_model_metadata_to_row(metadata)
    try:
        # Create dataframe from the row object.
        model_metadata_df = utils.create_dataframe(metadata_row, schema.model_metadata_schema)
    except DataFrameNotCreatedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise

    try:
        current_app.logger.info('Saving model metadata...')
        # Append the dataframe to existing dataframe if already exist or create a new one.
        utils.append(model_metadata_df, path.MODEL_METADATA)
        current_app.logger.info('Model metadata saved...')
    except DataFrameNotAppendedException as err:
        current_app.logger.error(str(err), exc_info=True)
        raise