示例#1
0
 def sdc_array_equal_str_arr_impl(A, B):
     is_index_equal = (len(A) == len(B)
                       and num_total_chars(A) == num_total_chars(B))
     for i in numpy.arange(len(A)):
         if (A[i] != B[i]
                 or str_arr_is_na(A, i) is not str_arr_is_na(B, i)):
             return False
     return is_index_equal
 def _sdc_str_arr_operator_comp_binop_impl(self, other):
     if len(self) != len(other):
         raise ValueError("Mismatch of String Arrays sizes in operator.comp_binop")
     n = len(self)
     out_list = [False] * n
     for i in numba.prange(n):
         out_list[i] = (self[i] < other[i]
                        and not (str_arr_is_na(self, i) or str_arr_is_na(other, i)))
     return out_list
示例#3
0
        def sdc_check_indexes_equal_string_impl(A, B):
            # TODO: replace with StringArrays comparison
            is_index_equal = (len(A) == len(B)
                              and num_total_chars(A) == num_total_chars(B))
            for i in numpy.arange(len(A)):
                if (A[i] != B[i]
                        or str_arr_is_na(A, i) is not str_arr_is_na(B, i)):
                    return False

            return is_index_equal
示例#4
0
 def _sdc_str_arr_operator_comp_binop_impl(self, other):
     n = len(other)
     out_list = [False] * n
     for i in numba.prange(n):
         out_list[i] = (self < other[i]
                        and not (str_arr_is_na(other, i)))
     return out_list
示例#5
0
        def sdc_join_series_indexes_impl(left, right):

            # allocate result arrays
            lsize = len(left)
            rsize = len(right)
            est_total_size = int(1.1 * (lsize + rsize))

            lidx = numpy.empty(est_total_size, numpy.int64)
            ridx = numpy.empty(est_total_size, numpy.int64)

            # use Series.sort_values since argsort for StringArrays not implemented
            original_left_series = pandas.Series(left)
            original_right_series = pandas.Series(right)

            # sort arrays saving the old positions
            left_series = original_left_series.sort_values(kind='mergesort')
            right_series = original_right_series.sort_values(kind='mergesort')
            sorted_left = left_series._index
            sorted_right = right_series._index

            i, j, k = 0, 0, 0
            while (i < lsize and j < rsize):
                lidx = _hpat_ensure_array_capacity(k + 1, lidx)
                ridx = _hpat_ensure_array_capacity(k + 1, ridx)

                left_index = left[sorted_left[i]]
                right_index = right[sorted_right[j]]

                if (left_index < right_index):
                    lidx[k] = sorted_left[i]
                    ridx[k] = -1
                    i += 1
                    k += 1
                elif (left_index > right_index):
                    lidx[k] = -1
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1
                else:
                    # find ends of sequences of equal index values in left and right
                    ni, nj = i, j
                    while (ni < lsize and left[sorted_left[ni]] == left_index):
                        ni += 1
                    while (nj < rsize
                           and right[sorted_right[nj]] == right_index):
                        nj += 1

                    # join the blocks found into results
                    for s in numpy.arange(i, ni, 1):
                        block_size = nj - j
                        to_lidx = numpy.repeat(sorted_left[s], block_size)
                        to_ridx = numpy.array(
                            [sorted_right[k] for k in numpy.arange(j, nj, 1)],
                            numpy.int64)

                        lidx = _hpat_ensure_array_capacity(
                            k + block_size, lidx)
                        ridx = _hpat_ensure_array_capacity(
                            k + block_size, ridx)

                        lidx[k:k + block_size] = to_lidx
                        ridx[k:k + block_size] = to_ridx
                        k += block_size
                    i = ni
                    j = nj

            # fill the end of joined with remaining part of left or right
            if i < lsize:
                block_size = lsize - i
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                ridx[k:k + block_size] = numpy.repeat(-1, block_size)
                while i < lsize:
                    lidx[k] = sorted_left[i]
                    i += 1
                    k += 1

            elif j < rsize:
                block_size = rsize - j
                lidx = _hpat_ensure_array_capacity(k + block_size, lidx)
                ridx = _hpat_ensure_array_capacity(k + block_size, ridx)
                lidx[k:k + block_size] = numpy.repeat(-1, block_size)
                while j < rsize:
                    ridx[k] = sorted_right[j]
                    j += 1
                    k += 1

            # count total number of characters and allocate joined array
            total_joined_size = k
            num_chars_in_joined = 0
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    num_chars_in_joined += len(left[lidx[i]])
                elif ridx[i] != -1:
                    num_chars_in_joined += len(right[ridx[i]])

            joined = pre_alloc_string_array(total_joined_size,
                                            num_chars_in_joined)

            # iterate over joined and fill it with indexes using lidx and ridx indexers
            for i in numpy.arange(total_joined_size):
                if lidx[i] != -1:
                    joined[i] = left[lidx[i]]
                    if (str_arr_is_na(left, lidx[i])):
                        str_arr_set_na(joined, i)
                elif ridx[i] != -1:
                    joined[i] = right[ridx[i]]
                    if (str_arr_is_na(right, ridx[i])):
                        str_arr_set_na(joined, i)
                else:
                    str_arr_set_na(joined, i)

            return joined, lidx, ridx