def _getadvanced(self, index): """ Advanced indexing (for sets, lists, or ndarrays). """ index = [asarray(i) for i in index] shape = index[0].shape if not all([i.shape == shape for i in index]): raise ValueError( "shape mismatch: indexing arrays could not be broadcast " "together with shapes " + ("%s " * self.ndim) % tuple([i.shape for i in index])) index = tuple([listify(i, d) for (i, d) in zip(index, self.shape)]) # build tuples with target indices key_tuples = list(zip(*index[0:self.split])) value_tuples = list(zip(*index[self.split:])) # build dictionary to look up targets in values d = {} for k, g in groupby(zip(value_tuples, key_tuples), lambda x: x[1]): d[k] = map(lambda x: x[0], list(g)) def key_check(key): return key in key_tuples def key_func(key): return unravel_index(key, shape) # filter records based on key targets filtered = self._rdd.filter(lambda kv: key_check(kv[0])) # subselect and flatten records based on value targets (if they exist) if len(value_tuples) > 0: flattened = filtered.flatMap( lambda kv: [(kv[0], kv[1][i]) for i in d[kv[0]]]) else: flattened = filtered # reindex indexed = flattened.zipWithIndex() rdd = indexed.map(lambda kkv: (key_func(kkv[1]), kkv[0][1])) split = len(shape) return rdd, shape, split
def _getadvanced(self, index): """ Advanced indexing (for sets, lists, or ndarrays). """ index = [asarray(i) for i in index] shape = index[0].shape if not all([i.shape == shape for i in index]): raise ValueError("shape mismatch: indexing arrays could not be broadcast " "together with shapes " + ("%s " * self.ndim) % tuple([i.shape for i in index])) index = tuple([listify(i, d) for (i, d) in zip(index, self.shape)]) # build tuples with target indices key_tuples = list(zip(*index[0:self.split])) value_tuples = list(zip(*index[self.split:])) # build dictionary to look up targets in values d = {} for k, g in groupby(zip(value_tuples, key_tuples), lambda x: x[1]): d[k] = map(lambda x: x[0], list(g)) def key_check(key): return key in key_tuples def key_func(key): return unravel_index(key, shape) # filter records based on key targets filtered = self._rdd.filter(lambda kv: key_check(kv[0])) # subselect and flatten records based on value targets (if they exist) if len(value_tuples) > 0: flattened = filtered.flatMap(lambda kv: [(kv[0], kv[1][i]) for i in d[kv[0]]]) else: flattened = filtered # reindex indexed = flattened.zipWithIndex() rdd = indexed.map(lambda kkv: (key_func(kkv[1]), kkv[0][1])) split = len(shape) return rdd, shape, split