def add_col_prefix( prefix: str, df: sparkDataFrame, ) -> sparkDataFrame: for col in df.columns: df = df.withColumnRenamed(col, f'{prefix}{col}') return df
def unpack_df_col( df: sparkDataFrame, col_name: str, ) -> List: df = df.withColumnRenamed(col_name, 'col_to_extract') list_col_contents = [row.col_to_extract for row in df.select('col_to_extract').collect()] return list_col_contents
def rename(df: DataFrame, columns: dict) -> DataFrame: """ 更改列名 :param df: 输入的数据表 :param columns: 旧列名作为键,新列名作为值,如{'old_name':'new_name'} :return: 更改后的表 """ for old_name, new_name in columns.items(): df = df.withColumnRenamed(old_name, new_name) return df
def rename_column_name(df: SparkDataFrame, name_set_list: List[Tuple[str, str]]) -> SparkDataFrame: """Rename the columns with given column names. :param df: the spark data frame with the columns to be renamed :param name_set_list: the list of tuple contains old name and new name :return: the renamed spark data frame """ df = reduce( lambda df, idx: df.withColumnRenamed(name_set_list[idx][0], name_set_list[idx][1]), range(len(name_set_list)), df) return df
def normalize_column_name(df: SparkDataFrame, normalize_func: callable) -> SparkDataFrame: """Normalize the column name to snake_case with the given function. :param df: the spark data frame with the column name to be normalized :param normalize_func: the function used to normalize column name :return: the spark data frame with normalized column name """ columns = df.columns df = reduce( lambda df, idx: df.withColumnRenamed(columns[idx], normalize_func(columns[idx])), range(len(columns)), df) return df