def group(rdd: RDD, **kwargs: Any) -> RDD: """ Performs grouping stage of the step. **Must to return (key, value) pairs.** Optional kwargs contain anything that was passed when calling the algorithm. """ return rdd.groupByKey()
def group(rdd: RDD) -> RDD: # type: ignore rdd = rdd.groupByKey().sortByKey() rdd = rdd.map( SortAndAssignLabels._sort_within_partition, preservesPartitioning=True ) return rdd
def group(rdd: RDD, **kwargs: Any) -> RDD: # type: ignore rdd = rdd.groupByKey().sortByKey() return rdd
def group(rdd: RDD, **kwargs: Any) -> RDD: rdd = rdd.mapPartitionsWithIndex(SampleAndAssignBuckets.extract_idx, preservesPartitioning=True) rdd = rdd.groupByKey() return rdd