示例#1
0
def repartition_sort_df(
    dataframe: DataFrame,
    partition_by: List[str],
    order_by: List[str],
    num_processors: int = None,
    num_partitions: int = None,
):
    """Partition and Sort the DataFrame.

    Args:
        dataframe: Spark DataFrame.
        partition_by: list of columns to partition by.
        order_by: list of columns to order by.
        num_processors: number of processors.
        num_partitions: number of partitions.

    Returns:
        Partitioned and sorted dataframe.

    """
    num_partitions = _num_partitions_definition(num_processors, num_partitions)
    dataframe = repartition_df(dataframe, partition_by, num_partitions)
    return dataframe.sortWithinPartitions(*order_by)