def indToSub(self, order='F', isOneBased=True, dims=None): """ Convert linear indexing to subscript indexing Parameters ---------- dims : array-like, optional Maximum dimensions. If not provided, will use dims property. order : str, 'C' or 'F', default = 'F' Specifies row-major or column-major array indexing. See numpy.unravel_index. onebased : boolean, default = True True if generated subscript indices are to start at 1, False to start at 0 """ from thunder.rdds.keys import _indToSubConverter if dims is None: dims = self.dims.max converter = _indToSubConverter(dims, order=order, isOneBased=isOneBased) rdd = self.rdd.map(lambda (k, v): (converter(k), v)) return self._constructor(rdd, index=self._index).__finalize__(self)
def check_indtosubResult(indsubParam): data = indsubParam.indices converter = _indToSubConverter(dims=indsubParam.dims, order=indsubParam.order, isOneBased=indsubParam.onebased) results = map(lambda x: converter(x), data) for res, expected, index in zip(results, indsubParam.subscripts, indsubParam.indices): assert_equals(expected, res, 'Got subscript %s instead of %s for index:%d, dims:%s' % (res, expected, index, str(indsubParam.dims)))
def query(self, inds, var='inds', order='F', isOneBased=True): """ Extract records with indices matching those provided Keys will be automatically linearized before matching to provided indices. This will not affect Parameters ---------- inds : str, or array-like (2D) Array of indices, each an array-like of integer indices, or filename of a MAT file containing a set of indices as a cell array var : str, optional, default = 'inds' Variable name if loading from a MAT file order : str, optional, default = 'F' Specify ordering for linearizing indices (see subtoind) onebased : boolean, optional, default = True Specify zero or one based indexing for linearizing (see subtoind) Returns ------- keys : array, shape (n, k) where k is the length of each value Averaged values values : array, shape (n, d) where d is the number of keys Averaged keys """ if isinstance(inds, str): inds = loadMatVar(inds, var)[0] else: inds = asarray(inds) n = len(inds) from thunder.rdds.keys import _indToSubConverter converter = _indToSubConverter(dims=self.dims.max, order=order, isOneBased=isOneBased) keys = zeros((n, len(self.dims.count))) values = zeros((n, len(self.first()[1]))) data = self.subToInd(order=order, isOneBased=isOneBased) for idx, indList in enumerate(inds): if len(indList) > 0: indsSet = set(asarray(indList).flat) bcInds = self.rdd.context.broadcast(indsSet) values[idx, :] = data.filterOnKeys( lambda k: k in bcInds.value).values().mean() keys[idx, :] = mean(map(lambda k: converter(k), indList), axis=0) return keys, values
def query(self, inds, var='inds', order='F', isOneBased=True): """ Extract records with indices matching those provided Keys will be automatically linearized before matching to provided indices. This will not affect Parameters ---------- inds : str, or array-like (2D) Array of indices, each an array-like of integer indices, or filename of a MAT file containing a set of indices as a cell array var : str, optional, default = 'inds' Variable name if loading from a MAT file order : str, optional, default = 'F' Specify ordering for linearizing indices (see subtoind) onebased : boolean, optional, default = True Specify zero or one based indexing for linearizing (see subtoind) Returns ------- keys : array, shape (n, k) where k is the length of each value Averaged values values : array, shape (n, d) where d is the number of keys Averaged keys """ if isinstance(inds, str): inds = loadMatVar(inds, var)[0] else: inds = asarray(inds) n = len(inds) from thunder.rdds.keys import _indToSubConverter converter = _indToSubConverter(dims=self.dims.max, order=order, isOneBased=isOneBased) keys = zeros((n, len(self.dims.count))) values = zeros((n, len(self.first()[1]))) data = self.subToInd(order=order, isOneBased=isOneBased) for idx, indList in enumerate(inds): if len(indList) > 0: indsSet = set(asarray(indList).flat) bcInds = self.rdd.context.broadcast(indsSet) values[idx, :] = data.filterOnKeys(lambda k: k in bcInds.value).values().mean() keys[idx, :] = mean(map(lambda k: converter(k), indList), axis=0) return keys, values
def test_ind_to_sub_array(self): inds = range(1, 13) converter = _indToSubConverter(dims=[2, 3, 2]) subs = map(lambda x: converter(x), inds) assert(allclose(subs, array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)])))