示例#1
0
文件: sort.py 项目: stjordanis/hpat
def parallel_sort(key_arrs, data, ascending=True):
    n_local = len(key_arrs[0])
    n_total = hpat.distributed_api.dist_reduce(n_local,
                                               np.int32(Reduce_Type.Sum.value))

    n_pes = hpat.distributed_api.get_size()
    my_rank = hpat.distributed_api.get_rank()

    # similar to Spark's sample computation Partitioner.scala
    sampleSize = min(samplePointsPerPartitionHint * n_pes, MIN_SAMPLES)

    fraction = min(sampleSize / max(n_total, 1), 1.0)
    n_loc_samples = min(math.ceil(fraction * n_local), n_local)
    inds = np.random.randint(0, n_local, n_loc_samples)
    samples = key_arrs[0][inds]
    # print(sampleSize, fraction, n_local, n_loc_samples, len(samples))

    all_samples = hpat.distributed_api.gatherv(samples)
    all_samples = to_string_list(all_samples)
    bounds = empty_like_type(n_pes - 1, all_samples)

    if my_rank == MPI_ROOT:
        all_samples.sort()
        if not ascending:
            all_samples = all_samples[::-1]
        n_samples = len(all_samples)
        step = math.ceil(n_samples / n_pes)
        for i in range(n_pes - 1):
            bounds[i] = all_samples[min((i + 1) * step, n_samples - 1)]
        # print(bounds)

    bounds = str_list_to_array(bounds)
    bounds = hpat.distributed_api.prealloc_str_for_bcast(bounds)
    hpat.distributed_api.bcast(bounds)

    # calc send/recv counts
    pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, True)
    node_id = 0
    for i in range(n_local):
        val = key_arrs[0][i]
        # TODO: refactor
        if node_id < (n_pes - 1) and (ascending and val >= bounds[node_id] or
                                      (not ascending)
                                      and val <= bounds[node_id]):
            node_id += 1
        update_shuffle_meta(pre_shuffle_meta, node_id, i, (val, ),
                            getitem_arr_tup(data, i), True)

    shuffle_meta = finalize_shuffle_meta(key_arrs, data, pre_shuffle_meta,
                                         n_pes, True)

    # shuffle
    recvs = alltoallv_tup(key_arrs + data, shuffle_meta)
    out_key = _get_keys_tup(recvs, key_arrs)
    out_data = _get_data_tup(recvs, key_arrs)

    return out_key, out_data
示例#2
0
def local_merge_new(left_key, right_key, data_left, data_right):
    curr_size = 101 + min(len(left_key), len(right_key)) // 10
    out_left_key = empty_like_type(curr_size, left_key)
    out_data_left = alloc_arr_tup(curr_size, data_left)
    out_data_right = alloc_arr_tup(curr_size, data_right)

    out_ind = 0
    left_ind = 0
    right_ind = 0

    while left_ind < len(left_key) and right_ind < len(right_key):
        if left_key[left_ind] == right_key[right_ind]:
            out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_ind])
            l_data_val = getitem_arr_tup(data_left, left_ind)
            out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val)
            r_data_val = getitem_arr_tup(data_right, right_ind)
            out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val)

            out_ind += 1
            left_run = left_ind + 1
            while left_run < len(left_key) and left_key[left_run] == right_key[right_ind]:
                out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_run])
                l_data_val = getitem_arr_tup(data_left, left_run)
                out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val)
                r_data_val = getitem_arr_tup(data_right, right_ind)
                out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val)

                out_ind += 1
                left_run += 1
            right_run = right_ind + 1
            while right_run < len(right_key) and right_key[right_run] == left_key[left_ind]:
                out_left_key = copy_elem_buff(out_left_key, out_ind, left_key[left_ind])
                l_data_val = getitem_arr_tup(data_left, left_ind)
                out_data_left = copy_elem_buff_tup(out_data_left, out_ind, l_data_val)
                r_data_val = getitem_arr_tup(data_right, right_run)
                out_data_right = copy_elem_buff_tup(out_data_right, out_ind, r_data_val)

                out_ind += 1
                right_run += 1
            left_ind += 1
            right_ind += 1
        elif left_key[left_ind] < right_key[right_ind]:
            left_ind += 1
        else:
            right_ind += 1

    #out_left_key = out_left_key[:out_ind]
    out_left_key = trim_arr(out_left_key, out_ind)

    out_right_key = out_left_key.copy()
    out_data_left = trim_arr_tup(out_data_left, out_ind)
    out_data_right = trim_arr_tup(out_data_right, out_ind)

    return out_left_key, out_right_key, out_data_left, out_data_right
示例#3
0
def parallel_sort(key_arr, data):
    n_local = len(key_arr)
    n_total = hpat.distributed_api.dist_reduce(n_local,
                                               np.int32(Reduce_Type.Sum.value))

    n_pes = hpat.distributed_api.get_size()
    my_rank = hpat.distributed_api.get_rank()

    # similar to Spark's sample computation Partitioner.scala
    sampleSize = min(samplePointsPerPartitionHint * n_pes, MIN_SAMPLES)

    fraction = min(sampleSize / max(n_total, 1), 1.0)
    n_loc_samples = min(math.ceil(fraction * n_local), n_local)
    inds = np.random.randint(0, n_local, n_loc_samples)
    samples = key_arr[inds]
    # print(sampleSize, fraction, n_local, n_loc_samples, len(samples))

    all_samples = hpat.distributed_api.gatherv(samples)
    all_samples = to_string_list(all_samples)
    bounds = empty_like_type(n_pes - 1, all_samples)

    if my_rank == MPI_ROOT:
        all_samples.sort()
        n_samples = len(all_samples)
        step = math.ceil(n_samples / n_pes)
        for i in range(n_pes - 1):
            bounds[i] = all_samples[min((i + 1) * step, n_samples - 1)]
        # print(bounds)

    bounds = str_list_to_array(bounds)
    bounds = hpat.distributed_api.prealloc_str_for_bcast(bounds)
    hpat.distributed_api.bcast(bounds)

    # calc send/recv counts
    shuffle_meta = alloc_shuffle_metadata(key_arr, n_pes, True)
    data_shuffle_meta = data_alloc_shuffle_metadata(data, n_pes, True)
    node_id = 0
    for i in range(n_local):
        val = key_arr[i]
        if node_id < (n_pes - 1) and val >= bounds[node_id]:
            node_id += 1
        update_shuffle_meta(shuffle_meta, node_id, i, val)
        update_data_shuffle_meta(data_shuffle_meta, node_id, i, data)

    finalize_shuffle_meta(key_arr, shuffle_meta, True)
    finalize_data_shuffle_meta(data, data_shuffle_meta, shuffle_meta, True)

    # shuffle
    alltoallv(key_arr, shuffle_meta)
    out_data = alltoallv_tup(data, data_shuffle_meta, shuffle_meta)

    return shuffle_meta.out_arr, out_data
示例#4
0
 def gatherv_impl(data):
     rank = hpat.distributed_api.get_rank()
     n_loc = len(data)
     recv_counts = gather_scalar(np.int32(n_loc))
     n_total = recv_counts.sum()
     all_data = empty_like_type(n_total, data)
     # displacements
     displs = np.empty(1, np.int32)
     if rank == MPI_ROOT:
         displs = hpat.hiframes.join.calc_disp(recv_counts)
     c_gatherv(data.ctypes, np.int32(n_loc), all_data.ctypes,
               recv_counts.ctypes, displs.ctypes, np.int32(typ_val))
     return all_data
示例#5
0
def local_merge_asof(left_key, right_key, data_left, data_right):
    # adapted from pandas/_libs/join_func_helper.pxi
    l_size = len(left_key)
    r_size = len(right_key)

    out_left_key = empty_like_type(l_size, left_key)
    out_right_key = empty_like_type(l_size, right_key)
    out_data_left = alloc_arr_tup(l_size, data_left)
    out_data_right = alloc_arr_tup(l_size, data_right)

    left_ind = 0
    right_ind = 0

    for left_ind in range(l_size):
        # restart right_ind if it went negative in a previous iteration
        if right_ind < 0:
            right_ind = 0

        # find last position in right whose value is less than left's
        while right_ind < r_size and right_key[right_ind] <= left_key[left_ind]:
            right_ind += 1

        right_ind -= 1

        out_left_key[left_ind] = left_key[left_ind]
        # TODO: copy_tup
        setitem_arr_tup(out_data_left, left_ind,
                        getitem_arr_tup(data_left, left_ind))

        if right_ind >= 0:
            out_right_key[left_ind] = right_key[right_ind]
            setitem_arr_tup(out_data_right, left_ind,
                            getitem_arr_tup(data_right, right_ind))
        else:
            setitem_arr_nan(out_right_key, left_ind)
            setitem_arr_tup_nan(out_data_right, left_ind)

    return out_left_key, out_right_key, out_data_left, out_data_right
示例#6
0
    def ensureCapacity(self, minCapacity):
        if self.tmpLength < minCapacity:
            # Compute smallest power of 2 > minCapacity
            newSize = minCapacity
            newSize |= newSize >> 1
            newSize |= newSize >> 2
            newSize |= newSize >> 4
            newSize |= newSize >> 8
            newSize |= newSize >> 16
            newSize += 1

            if newSize < 0:  # Not bloody likely!
                newSize = minCapacity
            else:
                newSize = min(newSize, self.aLength >> 1)

            self.tmp = empty_like_type(newSize, self.key_arr)
            self.tmp_data = alloc_arr_tup(newSize, self.data)
            self.tmpLength = newSize

        return self.tmp
示例#7
0
    def __init__(self, key_arr, aLength, data):
        self.key_arr = key_arr
        self.data = data
        self.aLength = aLength

        # This controls when we get *into* galloping mode.  It is initialized
        # to MIN_GALLOP.  The mergeLo and mergeHi methods nudge it higher for
        # random data, and lower for highly structured data.
        self.minGallop = MIN_GALLOP

        arr_len = aLength
        # Allocate temp storage (which may be increased later if necessary)
        self.tmpLength = arr_len >> 1 if arr_len < 2 * INITIAL_TMP_STORAGE_LENGTH else INITIAL_TMP_STORAGE_LENGTH
        self.tmp = empty_like_type(self.tmpLength, self.key_arr)
        self.tmp_data = alloc_arr_tup(self.tmpLength, data)

        # A stack of pending runs yet to be merged.  Run i starts at
        # address base[i] and extends for len[i] elements.  It's always
        # true (so long as the indices are in bounds) that:
        #
        #    runBase[i] + runLen[i] == runBase[i + 1]
        #
        # so we could cut the storage for this, but it's a minor amount,
        # and keeping all the info explicit simplifies the code.

        # Allocate runs-to-be-merged stack (which cannot be expanded).  The
        # stack length requirements are described in listsort.txt.  The C
        # version always uses the same stack length (85), but this was
        # measured to be too expensive when sorting "mid-sized" arrays (e.g.,
        # 100 elements) in Java.  Therefore, we use smaller (but sufficiently
        # large) stack lengths for smaller arrays.  The "magic numbers" in the
        # computation below must be changed if MIN_MERGE is decreased.  See
        # the MIN_MERGE declaration above for more information.

        self.stackSize = 0  # Number of pending runs on stack
        stackLen = 5 if arr_len < 120 else (10 if arr_len < 1542 else
                                            (19 if arr_len < 119151 else 40))
        self.runBase = np.empty(stackLen, np.int64)
        self.runLen = np.empty(stackLen, np.int64)