def sdc_reindex_series_impl(arr, index, name, by_index): # no reindexing is needed if indexes are equal if range_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) elif int64_indexes == True: # noqa equal_indexes = numpy_like.array_equal(index, by_index) else: equal_indexes = False if (index is by_index or equal_indexes): return pandas.Series(data=arr, index=by_index, name=name) if data_is_str_arr == True: # noqa _res_data = [''] * len(by_index) res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_) else: _res_data = numpy.empty(len(by_index), dtype=data_dtype) # build a dict of self.index values to their positions: map_index_to_position = Dict.empty(key_type=index_dtype, value_type=types.int32) for i, value in enumerate(index): if value in map_index_to_position: raise ValueError("cannot reindex from a duplicate axis") else: map_index_to_position[value] = i index_mismatch = 0 for i in numba.prange(len(by_index)): val = by_index[i] if val in map_index_to_position: pos_in_self = map_index_to_position[val] _res_data[i] = arr[pos_in_self] if data_is_str_arr == True: # noqa res_data_nan_mask[i] = isna(arr, i) else: index_mismatch += 1 if index_mismatch: msg = "Unalignable boolean Series provided as indexer " + \ "(index of the boolean Series and of the indexed object do not match)." raise IndexingError(msg) if data_is_str_arr == True: # noqa res_data = create_str_arr_from_list(_res_data) str_arr_set_na_by_mask(res_data, res_data_nan_mask) else: res_data = _res_data return pandas.Series(data=res_data, index=by_index, name=name)
def hpat_pandas_stringmethods_casefold_impl(self): mask = get_nan_mask(self._data._data) item_count = len(self._data) res_list = [''] * item_count for idx in numba.prange(item_count): res_list[idx] = self._data._data[idx].casefold() str_arr = create_str_arr_from_list(res_list) result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name)
def getitem_by_mask_impl(arr, idx): chunks = parallel_chunks(len(arr)) arr_len = numpy.empty(len(chunks), dtype=numpy.int64) length = 0 for i in prange(len(chunks)): chunk = chunks[i] res = 0 for j in range(chunk.start, chunk.stop): if idx[j]: res += 1 length += res arr_len[i] = res if is_str_arr == True: # noqa result_data = [''] * length result_nan_mask = numpy.empty(shape=length, dtype=types.bool_) else: result_data = numpy.empty(shape=length, dtype=res_dtype) for i in prange(len(chunks)): chunk = chunks[i] new_start = int(sum(arr_len[0:i])) current_pos = new_start for j in range(chunk.start, chunk.stop): if idx[j]: if is_range == True: # noqa value = arr.start + arr.step * j else: value = arr[j] result_data[current_pos] = value if is_str_arr == True: # noqa result_nan_mask[current_pos] = isna(arr, j) current_pos += 1 if is_str_arr == True: # noqa result_data_as_str_arr = create_str_arr_from_list(result_data) str_arr_set_na_by_mask(result_data_as_str_arr, result_nan_mask) return result_data_as_str_arr else: return result_data
def hpat_pandas_stringmethods_upper_impl(self): mask = get_nan_mask(self._data._data) item_count = len(self._data) result = [''] * item_count for it in numba.prange(item_count): item = self._data._data[it] if len(item) > 0: result[it] = item.upper() else: result[it] = item str_arr = create_str_arr_from_list(result) result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name)
def impl(self, to_strip=None): mask = get_nan_mask(self._data._data) item_count = len(self._data) res_list = [''] * item_count for it in range(item_count): item = self._data._data[it] if len(item) > 0: res_list[it] = usecase(item, to_strip) else: res_list[it] = item str_arr = create_str_arr_from_list(res_list) result = str_arr_set_na_by_mask(str_arr, mask) return pandas.Series(result, self._data._index, name=self._data._name)