Exemplo n.º 1
0
Arquivo: iloc.py Projeto: qinxuye/mars
def process_iloc_indexes(inp, indexes):
    ndim = inp.ndim

    if not isinstance(indexes, tuple):
        indexes = (indexes,)
    if len(indexes) < ndim:
        indexes += (slice(None),) * (ndim - len(indexes))
    if len(indexes) > ndim:
        raise IndexingError('Too many indexers')

    new_indexes = []
    # check each index
    for ax, index in enumerate(indexes):
        if isinstance(index, tuple):
            # a tuple should already have been caught by this point
            # so don't treat a tuple as a valid indexer
            raise IndexingError("Too many indexers")
        elif isinstance(index, slice):
            if any(v is not None for v in [index.start, index.stop, index.step]):
                pd_index = (inp.index_value if ax == 0 else inp.columns_value).to_pandas()
                for val in [index.start, index.stop, index.step]:
                    if val is not None:
                        try:
                            pd_index[val]  # check on the pandas
                        except IndexError:
                            pass
                        except TypeError:
                            raise TypeError(f'cannot do slice indexing on {type(pd_index)} '
                                            f'with these indexers [{val}] of {type(val)}')
            new_indexes.append(index)
        elif isinstance(index, (list, np.ndarray, pd.Series, ENTITY_TYPE)):
            if not isinstance(index, ENTITY_TYPE):
                index = np.asarray(index)
            else:
                index = asarray(index)
                if ax == 1:
                    # do not support tensor index on axis 1
                    # because if so, the dtypes and columns_value would be unknown
                    try:
                        index = index.fetch()
                    except (RuntimeError, ValueError):
                        raise NotImplementedError('indexer on axis columns cannot be '
                                                  'non-executed tensor')
            if index.dtype != np.bool_:
                index = index.astype(np.int64)
            if index.ndim != 1:
                raise ValueError('Buffer has wrong number of dimensions '
                                 f'(expected 1, got {index.ndim})')
            new_indexes.append(index)
        elif isinstance(index, Integral):
            shape = inp.shape[ax]
            if not np.isnan(shape):
                if index < -shape or index >= shape:
                    raise IndexError('single positional indexer is out-of-bounds')
            new_indexes.append(index)
        else:
            raise ValueError(_ILOC_ERROR_MSG)

    return new_indexes
Exemplo n.º 2
0
def process_iloc_indexes(inp, indexes):
    ndim = inp.ndim

    if not isinstance(indexes, tuple):
        indexes = (indexes,)
    if len(indexes) < ndim:
        indexes += (slice(None),) * (ndim - len(indexes))
    if len(indexes) > ndim:
        raise IndexingError('Too many indexers')

    new_indexes = []
    # check each index
    for ax, index in enumerate(indexes):
        if isinstance(index, tuple):
            # a tuple should already have been caught by this point
            # so don't treat a tuple as a valid indexer
            raise IndexingError("Too many indexers")
        elif isinstance(index, slice):
            pd_index = (inp.index_value if ax == 0 else inp.columns_value).to_pandas()
            for val in [index.start, index.stop, index.step]:
                if val is not None:
                    try:
                        pd_index[val]  # check on the pandas
                    except IndexError:
                        pass
                    except TypeError:
                        raise TypeError(
                            'cannot do slice indexing on {} '
                            'with these indexers [{}] '
                            'of {}'.format(type(pd_index), val, type(val)))
            new_indexes.append(index)
        elif isinstance(index, (list, np.ndarray, Base, Entity)):
            if not isinstance(index, (Base, Entity)):
                index = np.asarray(index)
            else:
                index = asarray(index)
                if ax == 1:
                    # do not support tensor index on axis 1
                    # because if so, the dtypes and columns_value would be unknown
                    try:
                        index = index.fetch()
                    except (RuntimeError, ValueError):
                        raise NotImplementedError('indexer on axis columns cannot be '
                                                  'non-executed tensor')
            if index.dtype != np.bool_:
                index = index.astype(np.int64)
            if index.ndim != 1:
                raise ValueError('Buffer has wrong number of dimensions '
                                 '(expected 1, got {})'.format(index.ndim))
            new_indexes.append(index)
        elif isinstance(index, Integral):
            new_indexes.append(index)
        else:
            raise ValueError(_ILOC_ERROR_MSG)

    return new_indexes
Exemplo n.º 3
0
def _parse_tuple(tup):
    """Unpack the user input for getitem and setitem and compute ndim

    loc[a] -> ([a], :), 1D
    loc[[a,b],] -> ([a,b], :),
    loc[a,b] -> ([a], [b]), 0D
    """
    row_loc, col_loc = slice(None), slice(None)

    if is_tuple(tup):
        row_loc = tup[0]
        if len(tup) == 2:
            col_loc = tup[1]
        if len(tup) > 2:
            raise IndexingError("Too many indexers")
    else:
        row_loc = tup

    ndim = _compute_ndim(row_loc, col_loc)
    row_scaler = is_scalar(row_loc)
    col_scaler = is_scalar(col_loc)
    row_loc = [row_loc] if row_scaler else row_loc
    col_loc = [col_loc] if col_scaler else col_loc

    return row_loc, col_loc, ndim, row_scaler, col_scaler
Exemplo n.º 4
0
def process_loc_indexes(inp, indexes):
    ndim = inp.ndim

    if not isinstance(indexes, tuple):
        indexes = (indexes, )
    if len(indexes) < ndim:
        indexes += (slice(None), ) * (ndim - len(indexes))
    if len(indexes) > ndim:
        raise IndexingError('Too many indexers')

    new_indexes = []
    for ax, index in enumerate(indexes):
        if isinstance(index, (list, np.ndarray, pd.Series, ENTITY_TYPE)):
            if not isinstance(index, ENTITY_TYPE):
                index = np.asarray(index)
            else:
                index = asarray(index)
                if ax == 1:
                    # do not support tensor index on axis 1
                    # because if so, the dtypes and columns_value would be unknown
                    try:
                        index = index.fetch()
                    except (RuntimeError, ValueError):
                        raise NotImplementedError(
                            'indexer on axis columns cannot be '
                            'non-executed tensor')
        new_indexes.append(index)

    return new_indexes
Exemplo n.º 5
0
 def __getitem__(self, key):
     # When getting along a single axis,
     if not isinstance(key, tuple):
         # Try to fasttrack the code through already optimized path
         try:
             return self.df.__getitem__(key)
         # This can happen if it is a list of rows
         except KeyError:
             pass
     else:
         if len(key) > self.df.ndim:
             raise IndexingError("Too many indexers")
         # If we're only slicing columns, handle the case with `__getitem__`
         if isinstance(key[0], slice) and key[0] == slice(None):
             if not isinstance(key[1], slice):
                 # Boolean indexers can just be sliced into the columns object and
                 # then passed to `__getitem__`
                 if is_boolean_array(key[1]):
                     return self.df.__getitem__(self.df.columns[key[1]])
                 return self.df.__getitem__(key[1])
             else:
                 result_slice = self.df.columns.slice_locs(
                     key[1].start, key[1].stop)
                 return self.df.iloc[:, slice(*result_slice)]
     row_loc, col_loc, ndim, self.row_scaler, self.col_scaler = _parse_tuple(
         key)
     row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
     # Check that the row_lookup/col_lookup is longer than 1 or that the
     # row_loc/col_loc is not boolean list to determine the ndim of the
     # result properly for multiindex.
     ndim = (0 if len(row_lookup) == 1 and not is_boolean_array(row_loc)
             else 1) + (0 if len(col_lookup) == 1
                        and not is_boolean_array(col_loc) else 1)
     result = super(_LocIndexer, self).__getitem__(row_lookup, col_lookup,
                                                   ndim)
     # Pandas drops the levels that are in the `loc`, so we have to as well.
     if hasattr(result, "index") and isinstance(result.index,
                                                pandas.MultiIndex):
         if (isinstance(result, Series) and not isinstance(col_loc, slice)
                 and all(col_loc[i] in result.index.levels[i]
                         for i in range(len(col_loc)))):
             result.index = result.index.droplevel(list(range(
                 len(col_loc))))
         elif all(row_loc[i] in result.index.levels[i]
                  for i in range(len(row_loc))):
             result.index = result.index.droplevel(list(range(
                 len(row_loc))))
     if (hasattr(result, "columns")
             and isinstance(result.columns, pandas.MultiIndex)
             and all(col_loc[i] in result.columns.levels[i]
                     for i in range(len(col_loc)))):
         result.columns = result.columns.droplevel(list(range(
             len(col_loc))))
     return result
Exemplo n.º 6
0
    def sdc_reindex_series_impl(arr, index, name, by_index):

        # no reindexing is needed if indexes are equal
        if range_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        elif int64_indexes == True:  # noqa
            equal_indexes = numpy_like.array_equal(index, by_index)
        else:
            equal_indexes = False
        if (index is by_index or equal_indexes):
            return pandas.Series(data=arr, index=by_index, name=name)

        if data_is_str_arr == True:  # noqa
            _res_data = [''] * len(by_index)
            res_data_nan_mask = numpy.zeros(len(by_index), dtype=types.bool_)
        else:
            _res_data = numpy.empty(len(by_index), dtype=data_dtype)

        # build a dict of self.index values to their positions:
        map_index_to_position = Dict.empty(key_type=index_dtype,
                                           value_type=types.int32)

        for i, value in enumerate(index):
            if value in map_index_to_position:
                raise ValueError("cannot reindex from a duplicate axis")
            else:
                map_index_to_position[value] = i

        index_mismatch = 0
        for i in numba.prange(len(by_index)):
            val = by_index[i]
            if val in map_index_to_position:
                pos_in_self = map_index_to_position[val]
                _res_data[i] = arr[pos_in_self]
                if data_is_str_arr == True:  # noqa
                    res_data_nan_mask[i] = isna(arr, i)
            else:
                index_mismatch += 1
        if index_mismatch:
            msg = "Unalignable boolean Series provided as indexer " + \
                  "(index of the boolean Series and of the indexed object do not match)."
            raise IndexingError(msg)

        if data_is_str_arr == True:  # noqa
            res_data = create_str_arr_from_list(_res_data)
            str_arr_set_na_by_mask(res_data, res_data_nan_mask)
        else:
            res_data = _res_data

        return pandas.Series(data=res_data, index=by_index, name=name)
Exemplo n.º 7
0
def _parse_tuple(tup):
    """
    Unpack the user input for getitem and setitem and compute ndim.

    loc[a] -> ([a], :), 1D
    loc[[a,b],] -> ([a,b], :),
    loc[a,b] -> ([a], [b]), 0D

    Parameters
    ----------
    tup : tuple
        User input to unpack.

    Returns
    -------
    row_loc : list
        List of row locators.
    col_list : list
        List of column locators.
    ndim : {0, 1, 2}
        Number of dimensions of located dataset.
    row_scaler : bool
        True if `row_loc` is a scalar, False otherwise.
    col_scaler : bool
        True if `col_loc` is a scalar, False otherwise.
    """
    row_loc, col_loc = slice(None), slice(None)

    if is_tuple(tup):
        row_loc = tup[0]
        if len(tup) == 2:
            col_loc = tup[1]
        if len(tup) > 2:
            raise IndexingError("Too many indexers")
    else:
        row_loc = tup

    ndim = _compute_ndim(row_loc, col_loc)
    row_scaler = is_scalar(row_loc)
    col_scaler = is_scalar(col_loc)
    row_loc = [row_loc] if row_scaler else row_loc
    col_loc = [col_loc] if col_scaler else col_loc

    return row_loc, col_loc, ndim, row_scaler, col_scaler
Exemplo n.º 8
0
 def __getitem__(self, key):
     # When getting along a single axis,
     if not isinstance(key, tuple):
         # Try to fasttrack the code through already optimized path
         try:
             return self.df.__getitem__(key)
         # This can happen if it is a list of rows
         except KeyError:
             pass
     else:
         if len(key) > self.df.ndim:
             raise IndexingError("Too many indexers")
         if key[0] == slice(None):
             return self.df.__getitem__(key[1])
     row_loc, col_loc, ndim, self.row_scaler, self.col_scaler = _parse_tuple(
         key)
     self._handle_enlargement(row_loc, col_loc)
     row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
     ndim = (0 if len(row_lookup) == 1 else 1) + (0 if len(col_lookup) == 1
                                                  else 1)
     result = super(_LocIndexer, self).__getitem__(row_lookup, col_lookup,
                                                   ndim)
     # Pandas drops the levels that are in the `loc`, so we have to as well.
     if hasattr(result, "index") and isinstance(result.index,
                                                pandas.MultiIndex):
         if (isinstance(result, Series) and not isinstance(col_loc, slice)
                 and all(col_loc[i] in result.index.levels[i]
                         for i in range(len(col_loc)))):
             result.index = result.index.droplevel(list(range(
                 len(col_loc))))
         elif all(row_loc[i] in result.index.levels[i]
                  for i in range(len(row_loc))):
             result.index = result.index.droplevel(list(range(
                 len(row_loc))))
     if (hasattr(result, "columns")
             and isinstance(result.columns, pandas.MultiIndex)
             and all(col_loc[i] in result.columns.levels[i]
                     for i in range(len(col_loc)))):
         result.columns = result.columns.droplevel(list(range(
             len(col_loc))))
     return result
Exemplo n.º 9
0
    def sdc_reindex_series_impl(arr, index, name, by_index):

        _, new_order = index.reindex(by_index)
        if new_order is not None:
            new_order_as_array = _nonoptional(new_order)
            index_mismatch = 0
            for i in numba.prange(len(by_index)):
                if new_order_as_array[i] == -1:
                    index_mismatch += 1

            if index_mismatch:
                # TO-DO: seems it covers only specific series reindex case, generalize?
                msg = "Unalignable boolean Series provided as indexer " + \
                      "(index of the boolean Series and of the indexed object do not match)."
                raise IndexingError(msg)

            res_data = numpy_like.take(arr, new_order_as_array)
        else:
            res_data = arr

        return pandas.Series(data=res_data, index=by_index, name=name)
Exemplo n.º 10
0
def _parse_tuple(tup):
    """
    Unpack the user input for getitem and setitem and compute ndim.

    TODO: Add more details for this docstring template.

    loc[a] -> ([a], :), 1D
    loc[[a,b],] -> ([a,b], :),
    loc[a,b] -> ([a], [b]), 0D

    Parameters
    ----------
    tup: tuple
        [Descsription]

    Returns
    -------
    What this returns (if anything)
    """
    row_loc, col_loc = slice(None), slice(None)

    if is_tuple(tup):
        row_loc = tup[0]
        if len(tup) == 2:
            col_loc = tup[1]
        if len(tup) > 2:
            raise IndexingError("Too many indexers")
    else:
        row_loc = tup

    ndim = _compute_ndim(row_loc, col_loc)
    row_scaler = is_scalar(row_loc)
    col_scaler = is_scalar(col_loc)
    row_loc = [row_loc] if row_scaler else row_loc
    col_loc = [col_loc] if col_scaler else col_loc

    return row_loc, col_loc, ndim, row_scaler, col_scaler
Exemplo n.º 11
0
    def _parse_row_and_column_locators(self, tup):
        """
        Unpack the user input for getitem and setitem and compute ndim.

        loc[a] -> ([a], :), 1D
        loc[[a,b]] -> ([a,b], :),
        loc[a,b] -> ([a], [b]), 0D

        Parameters
        ----------
        tup : tuple
            User input to unpack.

        Returns
        -------
        row_loc : scalar or list
            Row locator(s) as a scalar or List.
        col_list : scalar or list
            Column locator(s) as a scalar or List.
        ndim : {0, 1, 2}
            Number of dimensions of located dataset.
        """
        row_loc, col_loc = slice(None), slice(None)

        if is_tuple(tup):
            row_loc = tup[0]
            if len(tup) == 2:
                col_loc = tup[1]
            if len(tup) > 2:
                raise IndexingError("Too many indexers")
        else:
            row_loc = tup

        row_loc = row_loc(self.df) if callable(row_loc) else row_loc
        col_loc = col_loc(self.df) if callable(col_loc) else col_loc
        return row_loc, col_loc, _compute_ndim(row_loc, col_loc)