Exemplo n.º 1
0
    def __getitem__(self, item):
        # handle values -- convert ints to slices so no dimensions are dropped
        if isinstance(item, int):
            item = tuple([slicify(item, self.shape[0])])
        if isinstance(item, tuple):
            item = tuple([slicify(i, n) if isinstance(i, int) else i for i, n in zip(item, self.shape[:len(item)])])
        if isinstance(item, (list, ndarray)):
            item = (item,)
        new = self._values.__getitem__(item)
        result = self._constructor(new).__finalize__(self, noprop=('index', 'labels'))

        # handle labels
        if self.labels is not None:
            if isinstance(item, int):
                label_item = ([item],)
            elif isinstance(item, (list, ndarray, slice)):
                label_item = (item, )
            elif isinstance(item, tuple):
                label_item = item[:len(self.baseaxes)]
            newlabels = self.labels
            for (i, s) in enumerate(label_item):
                if isinstance(s, slice):
                    newlabels = newlabels[[s if j==i else slice(None) for j in range(len(label_item))]]
                else:
                    newlabels = newlabels.take(tupleize(s), i)
            result.labels = newlabels

        return result
Exemplo n.º 2
0
    def __getitem__(self, item):
        # handle values -- convert ints to slices so no dimensions are dropped
        if isinstance(item, int):
            item = tuple([slicify(item, self.shape[0])])
        if isinstance(item, tuple):
            item = tuple([slicify(i, n) if isinstance(i, int) else i for i, n in zip(item, self.shape[:len(item)])])
        if isinstance(item, (list, ndarray)):
            item = (item,)
        new = self._values.__getitem__(item)
        result = self._constructor(new).__finalize__(self, noprop=('index', 'labels'))

        # handle labels
        if self.labels is not None:
            if isinstance(item, int):
                label_item = ([item],)
            elif isinstance(item, (list, ndarray, slice)):
                label_item = (item, )
            elif isinstance(item, tuple):
                label_item = item[:len(self.baseaxes)]
            newlabels = self.labels
            for (i, s) in enumerate(label_item):
                if isinstance(s, slice):
                    newlabels = newlabels[[s if j==i else slice(None) for j in range(len(label_item))]]
                else:
                    newlabels = newlabels.take(tupleize(s), i)
            result.labels = newlabels

        return result
Exemplo n.º 3
0
    def _getbasic(self, index):
        """
        Basic indexing (for slices or ints).
        """
        index = tuple([slicify(s, d) for (s, d) in zip(index, self.shape)])
        key_slices = index[0:self.split]
        value_slices = index[self.split:]

        def key_check(key):
            check = lambda kk, ss: ss.start <= kk < ss.stop and mod(kk - ss.start, ss.step) == 0
            out = [check(k, s) for k, s in zip(key, key_slices)]
            return all(out)

        def key_func(key):
            return tuple([(k - s.start)/s.step for k, s in zip(key, key_slices)])

        filtered = self._rdd.filter(lambda kv: key_check(kv[0]))
        rdd = filtered.map(lambda kv: (key_func(kv[0]), kv[1][value_slices]))
        shape = tuple([int(ceil((s.stop - s.start) / float(s.step))) for s in index])
        split = self.split
        return rdd, shape, split
Exemplo n.º 4
0
    def _getbasic(self, index):
        """
        Basic indexing (for slices or ints).
        """
        index = tuple([slicify(s, d) for (s, d) in zip(index, self.shape)])
        key_slices = index[0:self.split]
        value_slices = index[self.split:]

        def key_check(key):
            check = lambda kk, ss: ss.start <= kk < ss.stop and mod(kk - ss.start, ss.step) == 0
            out = [check(k, s) for k, s in zip(key, key_slices)]
            return all(out)

        def key_func(key):
            return tuple([(k - s.start)/s.step for k, s in zip(key, key_slices)])

        filtered = self._rdd.filter(lambda kv: key_check(kv[0]))
        rdd = filtered.map(lambda kv: (key_func(kv[0]), kv[1][value_slices]))
        shape = tuple([int(ceil((s.stop - s.start) / float(s.step))) for s in index])
        split = self.split
        return rdd, shape, split
Exemplo n.º 5
0
    def __getitem__(self, index):
        """
        Get an item from the array through indexing.

        Supports basic indexing with slices and ints, or advanced
        indexing with lists or ndarrays of integers.
        Mixing basic and advanced indexing across axes is currently supported
        only for a single advanced index amidst multiple basic indices.

        Parameters
        ----------
        index : tuple of slices, ints, list, tuple, or ndarrays
            One or more index specifications

        Returns
        -------
        BoltSparkArray
        """
        if isinstance(index, tuple):
            index = list(index)
        else:
            index = [index]
        int_locs = where([isinstance(i, int) for i in index])[0]

        if len(index) > self.ndim:
            raise ValueError("Too many indices for array")

        if not all(
            [isinstance(i, (slice, int, list, tuple, ndarray))
             for i in index]):
            raise ValueError(
                "Each index must either be a slice, int, list, set, or ndarray"
            )

        # fill unspecified axes with full slices
        if len(index) < self.ndim:
            index += tuple(
                [slice(0, None, None) for _ in range(self.ndim - len(index))])

        # standardize slices and bounds checking
        for n, idx in enumerate(index):
            size = self.shape[n]
            if isinstance(idx, (slice, int)):
                slc = slicify(idx, size)
                # throw an error if this would lead to an empty dimension in numpy
                if slc.step > 0:
                    minval, maxval = slc.start, slc.stop
                else:
                    minval, maxval = slc.stop, slc.start
                if minval > size - 1 or maxval < 1 or minval >= maxval:
                    raise ValueError(
                        "Index {} in dimension {} with shape {} would "
                        "produce an empty dimension".format(idx, n, size))
                index[n] = slc
            else:
                adjusted = array(idx)
                inds = where(adjusted < 0)
                adjusted[inds] += size
                if adjusted.min() < 0 or adjusted.max() > size - 1:
                    raise ValueError(
                        "Index {} out of bounds in dimension {} with "
                        "shape {}".format(idx, n, size))
                index[n] = adjusted

        # select basic or advanced indexing
        if all([isinstance(i, slice) for i in index]):
            rdd, shape, split = self._getbasic(index)
        elif all([isinstance(i, (tuple, list, ndarray)) for i in index]):
            rdd, shape, split = self._getadvanced(index)
        elif sum([isinstance(i, (tuple, list, ndarray)) for i in index]) == 1:
            rdd, shape, split = self._getmixed(index)
        else:
            raise NotImplementedError(
                "When mixing basic indexing (slices and int) with "
                "with advanced indexing (lists, tuples, and ndarrays), "
                "can only have a single advanced index")

        # if any key indices used negative steps, records are no longer ordered
        if self._ordered is False or any(
            [isinstance(s, slice) and s.step < 0 for s in index[:self.split]]):
            ordered = False
        else:
            ordered = True

        result = self._constructor(rdd,
                                   shape=shape,
                                   split=split,
                                   ordered=ordered).__finalize__(self)

        # squeeze out int dimensions (and squeeze to singletons if all ints)
        if len(int_locs) == self.ndim:
            return result.squeeze().toarray()[()]
        else:
            return result.squeeze(tuple(int_locs))
Exemplo n.º 6
0
    def __getitem__(self, index):
        """
        Get an item from the array through indexing.
        Supports basic indexing with slices and ints, or advanced
        indexing with lists or ndarrays of integers.
        Mixing basic and advanced indexing across axes is currently supported
        only for a single advanced index amidst multiple basic indices.
        Parameters
        ----------
        index : tuple of slices
            One or more index specifications
        Returns
        -------
        NDArray
        """
        if isinstance(index, tuple):
            index = list(index)
        else:
            index = [index]
        int_locs = np.where([isinstance(i, int) for i in index])[0]

        if len(index) > self.ndim:
            raise ValueError("Too many indices for array")

        if not all([
                isinstance(i, (slice, int, list, tuple, np.ndarray))
                for i in index
        ]):
            raise ValueError(
                "Each index must either be a slice, int, list, set, or ndarray"
            )

        # fill unspecified axes with full slices
        if len(index) < self.ndim:
            index += tuple(
                [slice(0, None, None) for _ in range(self.ndim - len(index))])

        # standardize slices and bounds checking
        for n, idx in enumerate(index):
            size = self.shape[n]
            if isinstance(idx, (slice, int)):
                slc = slicify(idx, size)
                # throw an error if this would lead to an empty dimension in numpy
                if slc.step > 0:
                    minval, maxval = slc.start, slc.stop
                else:
                    minval, maxval = slc.stop, slc.start
                if minval > size - 1 or maxval < 1 or minval >= maxval:
                    raise ValueError(
                        "Index {} in dimension {} with shape {} would "
                        "produce an empty dimension".format(idx, n, size))
                index[n] = slc
            else:
                adjusted = np.array(idx)
                inds = np.where(adjusted < 0)
                adjusted[inds] += size
                if adjusted.min() < 0 or adjusted.max() > size - 1:
                    raise ValueError(
                        "Index {} out of bounds in dimension {} with "
                        "shape {}".format(idx, n, size))
                index[n] = adjusted
        # assume basic indexing
        if all([isinstance(i, slice) for i in index]) and (len(index) <= 3):
            assert len(index) > 1, "Too short of an index"
            assert index[0].start <= index[
                0].stop, "Indexes cannot be backwards"
            assert index[1].start <= index[
                1].stop, "Indexes cannot be backwards"
            out_arr = self._bckend.read_region(xstart=int(index[0].start),
                                               xend=int(index[0].stop),
                                               ystart=int(index[1].start),
                                               yend=int(index[1].stop))
            out_arr = out_arr[::index[0].step, ::index[1].step]
            return out_arr[:, :, index[2]] if len(index) == 3 else out_arr

        else:
            raise NotImplementedError(
                "When mixing basic indexing (slices and int) with "
                "with advanced indexing (lists, tuples, and ndarrays), "
                "can only have a single advanced index")
Exemplo n.º 7
0
Arquivo: array.py Projeto: gdtm86/bolt
    def __getitem__(self, index):
        """
        Get an item from the array through indexing.

        Supports basic indexing with slices and ints, or advanced
        indexing with lists or ndarrays of integers.
        Mixing basic and advanced indexing across axes is currently supported
        only for a single advanced index amidst multiple basic indices.

        Parameters
        ----------
        index : tuple of slices, ints, list, tuple, or ndarrays
            One or more index specifications

        Returns
        -------
        BoltSparkArray
        """
        if isinstance(index, tuple):
            index = list(index)
        else:
            index = [index]
        int_locs = where([isinstance(i, int) for i in index])[0]

        if len(index) > self.ndim:
            raise ValueError("Too many indices for array")

        if not all([isinstance(i, (slice, int, list, tuple, ndarray)) for i in index]):
            raise ValueError("Each index must either be a slice, int, list, set, or ndarray")

        # fill unspecified axes with full slices
        if len(index) < self.ndim:
            index += tuple([slice(0, None, None) for _ in range(self.ndim - len(index))])

        # standardize slices and bounds checking
        for n, idx in enumerate(index):
            size = self.shape[n]
            if isinstance(idx, (slice, int)):
                slc = slicify(idx, size)
                # throw an error if this would lead to an empty dimension in numpy
                if slc.step > 0:
                    minval, maxval = slc.start, slc.stop
                else:
                    minval, maxval = slc.stop, slc.start
                if minval > size-1 or maxval < 1 or minval >= maxval:
                    raise ValueError("Index {} in dimension {} with shape {} would "
                                     "produce an empty dimension".format(idx, n, size))
                index[n] = slc
            else:
                adjusted = array(idx)
                inds = where(adjusted<0)
                adjusted[inds] += size
                if adjusted.min() < 0 or adjusted.max() > size-1:
                    raise ValueError("Index {} out of bounds in dimension {} with "
                                     "shape {}".format(idx, n, size))
                index[n] = adjusted

        # select basic or advanced indexing
        if all([isinstance(i, slice) for i in index]):
            rdd, shape, split = self._getbasic(index)
        elif all([isinstance(i, (tuple, list, ndarray)) for i in index]):
            rdd, shape, split = self._getadvanced(index)
        elif sum([isinstance(i, (tuple, list, ndarray)) for i in index]) == 1:
            rdd, shape, split = self._getmixed(index)
        else:
            raise NotImplementedError("When mixing basic indexing (slices and int) with "
                                      "with advanced indexing (lists, tuples, and ndarrays), "
                                      "can only have a single advanced index")

        # if any key indices used negative steps, records are no longer ordered
        if self._ordered is False or any([isinstance(s, slice) and s.step<0 for s in index[:self.split]]):
            ordered = False
        else:
            ordered = True

        result = self._constructor(rdd, shape=shape, split=split, ordered=ordered).__finalize__(self)

        # squeeze out int dimensions (and squeeze to singletons if all ints)
        if len(int_locs) == self.ndim:
            return result.squeeze().toarray()[()]
        else:
            return result.squeeze(tuple(int_locs))