示例#1
0
def add_col_prefix(
        prefix: str,
        df: sparkDataFrame,
) -> sparkDataFrame:
    for col in df.columns:
        df = df.withColumnRenamed(col, f'{prefix}{col}')
    return df
示例#2
0
def unpack_df_col(
        df: sparkDataFrame,
        col_name: str,
) -> List:

    df = df.withColumnRenamed(col_name, 'col_to_extract')
    list_col_contents = [row.col_to_extract for row in  df.select('col_to_extract').collect()]

    return list_col_contents
示例#3
0
def rename(df: DataFrame, columns: dict) -> DataFrame:
    """
    更改列名
    :param df: 输入的数据表
    :param columns: 旧列名作为键,新列名作为值,如{'old_name':'new_name'}
    :return: 更改后的表
    """
    for old_name, new_name in columns.items():
        df = df.withColumnRenamed(old_name, new_name)
    return df
示例#4
0
def rename_column_name(df: SparkDataFrame,
                       name_set_list: List[Tuple[str, str]]) -> SparkDataFrame:
    """Rename the columns with given column names.

    :param df: the spark data frame with the columns to be renamed
    :param name_set_list: the list of tuple contains old name and new name
    :return: the renamed spark data frame
    """
    df = reduce(
        lambda df, idx: df.withColumnRenamed(name_set_list[idx][0],
                                             name_set_list[idx][1]),
        range(len(name_set_list)), df)
    return df
示例#5
0
def normalize_column_name(df: SparkDataFrame,
                          normalize_func: callable) -> SparkDataFrame:
    """Normalize the column name to snake_case with the given function.

    :param df: the spark data frame with the column name to be normalized
    :param normalize_func: the function used to normalize column name
    :return: the spark data frame with normalized column name
    """
    columns = df.columns
    df = reduce(
        lambda df, idx: df.withColumnRenamed(columns[idx],
                                             normalize_func(columns[idx])),
        range(len(columns)), df)

    return df