def __getitem__(self, key): """ Overwrites csr_matrix m[i,:], m[i] operations which are faulty in current scipy.sparse releases. """ def __get_row(row): start = self.mat.indptr[row] end = self.mat.indptr[row + 1] return SparseMatrix( csr_matrix( (self.mat.data[start:end], self.mat.indices[start:end], [0, end - start]), shape=(1, self.mat.shape[1]), copy=True, ) ) if isinstance(key, tuple): row = key[0] col = key[1] if isintlike(row) and row >= 0 and isinstance(col, slice): if col == slice(None, None, None): return __get_row(row) if isintlike(key) and key >= 0: return __get_row(key) result = self.mat[key] if is_numeric(result): return result else: return SparseMatrix(result)
def __getitem__(self, key): if isinstance(key, tuple): row = key[0] col = key[1] # TODO implement CSR[ [1,2,3], X ] with sparse matmat # TODO make use of sorted indices if isintlike(row) and isintlike(col): return self._get_single_element(row,col) else: major,minor = self._swap((row,col)) if isintlike(major) and isinstance(minor,slice): minor_shape = self._swap(self.shape)[1] start, stop, stride = minor.indices(minor_shape) out_shape = self._swap((1, stop-start)) return self._get_slice(major, start, stop, stride, out_shape) elif isinstance(row, slice) or isinstance(col, slice): return self._get_submatrix(row, col) else: raise NotImplementedError elif isintlike(key): return self[key, :] else: raise IndexError("invalid index")
def __init__(self, shape, dtype, dot=None, dot_adj=None): if len(shape)!=2 or not isintlike(shape[0]) or not isintlike(shape[1]): raise ValueError('shape must be (m,n) with m and n integer') self.shape = shape self.dtype = numpy.dtype(dtype) # defaults to float64 if dot is None and dot_adj is None: raise ValueError('dot or dot_adj have to be defined') self._dot = dot self._dot_adj = dot_adj
def get(self, key, default=0.): """This overrides the dict.get method, providing type checking but otherwise equivalent functionality. """ try: i, j = key assert isintlike(i) and isintlike(j) except (AssertionError, TypeError, ValueError): raise IndexError('index must be a pair of integers') if (i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]): raise IndexError('index out of bounds') return ddict.get(self, key, default)
def __setitem__(self, index, x): if isinstance(index, tuple) and len(index) == 2: # Integer index fast path i, j = index if (isintlike(i) and isintlike(j) and 0 <= i < self.shape[0] and 0 <= j < self.shape[1]): v = np.asarray(x, dtype=self.dtype) if v.ndim == 0 and v != 0: ddict.__setitem__(self, (int(i), int(j)), v[()]) return i, j = self._unpack_index(index) i, j = self._index_to_arrays(i, j) if isspmatrix(x): x = x.toarray() # Make x and i into the same shape x = np.asarray(x, dtype=self.dtype) x, _ = np.broadcast_arrays(x, i) if x.shape != i.shape: raise ValueError("shape mismatch in assignment") if np.size(x) == 0: return min_i = i.min() if min_i < -self.shape[0] or i.max() >= self.shape[0]: raise IndexError('index (%d) out of range -%d to %d)' % (i.min(), self.shape[0], self.shape[0] - 1)) if min_i < 0: i = i.copy() i[i < 0] += self.shape[0] min_j = j.min() if min_j < -self.shape[0] or j.max() >= self.shape[1]: raise IndexError('index (%d) out of range -%d to %d)' % (j.min(), self.shape[1], self.shape[1] - 1)) if min_j < 0: j = j.copy() j[j < 0] += self.shape[1] ddict.update(self, izip(izip(i.flat, j.flat), x.flat)) if 0 in x: zeroes = x == 0 for key in izip(i[zeroes].flat, j[zeroes].flat): if ddict.__getitem__(self, key) == 0: # may have been superseded by later update del self[key]
def test_isintlike(self): assert_equal(sputils.isintlike(3.0),True) assert_equal(sputils.isintlike(-4),True) assert_equal(sputils.isintlike(np.array(3)),True) assert_equal(sputils.isintlike(np.array([3])), False) assert_equal(sputils.isintlike(2.5),False) assert_equal(sputils.isintlike(1 + 3j),False) assert_equal(sputils.isintlike( (1,) ), False) assert_equal(sputils.isintlike( (1,2) ), False)
def test_isintlike(self): assert_equal(sputils.isintlike(3.0), True) assert_equal(sputils.isintlike(-4), True) assert_equal(sputils.isintlike(np.array(3)), True) assert_equal(sputils.isintlike(np.array([3])), False) assert_equal(sputils.isintlike(2.5), False) assert_equal(sputils.isintlike(1 + 3j), False) assert_equal(sputils.isintlike((1, )), False) assert_equal(sputils.isintlike((1, 2)), False)
def __init__(self, A, p): if not isinstance(A, LinearOperator): raise ValueError('LinearOperator expected as A') if A.shape[0] != A.shape[1]: raise ValueError('square LinearOperator expected, got %r' % A) if not isintlike(p) or p < 0: raise ValueError('non-negative integer expected as p') super(_PowerLinearOperator, self).__init__(_get_dtype([A]), A.shape) self.args = (A, p)
def __init__(self, A, p): if not isinstance(A, LinearOperator): raise ValueError('LinearOperator expected as A') if A.shape[0]!=A.shape[1]: raise ValueError('square LinearOperator expected as A') if not isintlike(p): raise ValueError('integer expected as p') self.args = (A,p) super(_PowerLinearOperator, self).__init__(A.shape, A.dtype, self._dot, self._dot_adj)
def __init__(self, A, p): if not isinstance(A, LinearOperator): raise ValueError('LinearOperator expected as A') if A.shape[0] != A.shape[1]: raise ValueError('square LinearOperator expected, got %r' % A) if not isintlike(p): raise ValueError('integer expected as p') super(_PowerLinearOperator, self).__init__(_get_dtype([A]), A.shape) self.args = (A, p)
def test_isintlike(self): assert_equal(sputils.isintlike(-4), True) assert_equal(sputils.isintlike(np.array(3)), True) assert_equal(sputils.isintlike(np.array([3])), False) with suppress_warnings() as sup: sup.filter(DeprecationWarning, "Inexact indices into sparse matrices are deprecated") assert_equal(sputils.isintlike(3.0), True) assert_equal(sputils.isintlike(2.5), False) assert_equal(sputils.isintlike(1 + 3j), False) assert_equal(sputils.isintlike((1,)), False) assert_equal(sputils.isintlike((1, 2)), False)
def __init__(self, A, p): if not isinstance(A, LinearOperator): raise ValueError('LinearOperator expected as A') if A.shape[0] != A.shape[1]: raise ValueError('square LinearOperator expected as A') if not isintlike(p): raise ValueError('integer expected as p') super(_PowerLinearOperator, self).__init__(A.shape, self.matvec, self.rmatvec, self.matmat, _get_dtype([A])) self.args = (A, p)
def __getitem__(self, index): assert sputils.issequence(index) or sputils.isintlike(index) users = np.array(index).reshape(-1, ) extracted_sparse_matrix = self._extract(self.interactions_matrix, index) if self.target_interactions_matrix is None: return UsersInteractions( users=users, interactions_matrix=extracted_sparse_matrix), None else: extracted_target_sparse_matrix = self._extract( self.target_interactions_matrix, index) return UsersInteractions(users=users, interactions_matrix=extracted_sparse_matrix), \ UsersInteractions(users=users, interactions_matrix=extracted_target_sparse_matrix)
def _process_slice(sl, num): if sl is None: i0, i1 = 0, num elif isinstance(sl, slice): i0, i1, stride = sl.indices(num) if stride != 1: raise ValueError('slicing with step != 1 not supported') i0 = min(i0, i1) # give an empty slice when i0 > i1 elif isintlike(sl): if sl < 0: sl += num i0, i1 = sl, sl + 1 if i0 < 0 or i1 > num: raise IndexError('index out of bounds: 0 <= %d < %d <= %d' % (i0, i1, num)) else: raise TypeError('expected slice or scalar') return i0, i1
def process_slice(sl, num): if isinstance(sl, slice): if sl.step not in (1, None): raise ValueError('slicing with step != 1 not supported') i0, i1 = sl.start, sl.stop if i0 is None: i0 = 0 elif i0 < 0: i0 = num + i0 if i1 is None: i1 = num elif i1 < 0: i1 = num + i1 return i0, i1 elif isintlike(sl): if sl < 0: sl += num return sl, sl + 1 else: raise TypeError('expected slice or scalar')
def __getitem__(self, key): """ Matrix slicing and element access operator For the most part, we can piggyback off the superclass implementation, but we need to implement additional logic to deal with the case where a delta vector's reference row is not in the slice. In this case we must reconstruct the full row and store it directly. """ row, col = self._unpack_index(key) # fast path for row optimized methods if isintlike(row): if isinstance(col, slice): return self._get_row_slice(row, col) # calling the superclass implementation will give us a CSR matrix in # which some of the rows might be delta encodings raw_matrix = super().__getitem__(key) # find out which rows from the matrix are included in the output if type(key[0]) == slice: start = key[0].start if key[0].start is not None else 0 stop = key[0].stop if key[0].stop is not None else self.shape[0] step = key[0].step if key[0].step is not None else 1 out_rows = list(range(start, stop, step)) else: start = key[0][0] out_rows = key[0] # mapping from original row indices to slice row indices row_map = dict(zip(out_rows, range(len(out_rows)))) # now correct each of the included rows # we will create a matrix of deltas that were not included in the slice, # and add to the raw matrix to get the corrected matrix d_data = np.array([], dtype=raw_matrix.dtype) d_indices = np.array([], dtype=raw_matrix.indices.dtype) d_indptr = [0] # the sliced matrix will need an adjusted deltas array new_deltas = np.arange(raw_matrix.shape[0]) for i, row in enumerate(out_rows): if self.deltas[row] not in out_rows: # fetch the reference row ref = self.getrow(self.deltas[row]) # add it to the adjustment matrix d_data = np.concatenate((d_data, ref.data)) d_indices = np.concatenate((d_indices, ref.indices)) d_indptr.append(d_indices.shape[0]) else: d_indptr.append(d_indptr[-1]) # the delta is still valid, but the row index needs to be updated self.deltas[i] = row_map[row] # build the adjustment matrix, which we will add to the raw matrix data # to get the correct output matrix adj_matrix = csr_matrix((d_data, d_indices, d_indptr), shape=raw_matrix.shape, dtype=raw_matrix.dtype) # now add the adjustment matrix to the raw matrix data corrected_matrix = csr_matrix( (raw_matrix.data, raw_matrix.indices, raw_matrix.indptr), shape=raw_matrix.shape, dtype=raw_matrix.dtype) + adj_matrix return delta_csr_matrix( (corrected_matrix.data, corrected_matrix.indices, corrected_matrix.indptr, new_deltas), shape=corrected_matrix.shape, dtype=corrected_matrix.dtype)
def __getitem__(self, key): def asindices(x): try: x = np.asarray(x) # Check index contents, to avoid creating 64-bit arrays needlessly idx_dtype = get_index_dtype((x, ), check_contents=True) if idx_dtype != x.dtype: x = x.astype(idx_dtype) except: raise IndexError('invalid index') else: return x def check_bounds(indices, N): if indices.size == 0: return (0, 0) max_indx = indices.max() if max_indx >= N: raise IndexError('index (%d) out of range' % max_indx) min_indx = indices.min() if min_indx < -N: raise IndexError('index (%d) out of range' % (N + min_indx)) return (min_indx, max_indx) def extractor(indices, N): """Return a sparse matrix P so that P*self implements slicing of the form self[[1,2,3],:] """ indices = asindices(indices) (min_indx, max_indx) = check_bounds(indices, N) if min_indx < 0: indices = indices.copy() indices[indices < 0] += N indptr = da.arange(len(indices) + 1, dtype=indices.dtype, chunks=self.chunks) data = da.ones(len(indices), dtype=self.dtype, chunks=self.chunks) shape = (len(indices), N) return csr_matrix((data, indices, indptr), shape=shape) row, col = self._unpack_index(key) # First attempt to use original row optimized methods # [1, ?] if isintlike(row): # [i, j] if isintlike(col): return self._get_single_element(row, col) # [i, 1:2] elif isinstance(col, slice): return self._get_row_slice(row, col) # [i, [1, 2]] elif issequence(col): P = extractor(col, self.shape[1]).T return self[row, :] * P elif isinstance(row, slice): # [1:2,??] if ((isintlike(col) and row.step in (1, None)) or (isinstance(col, slice) and col.step in (1, None) and row.step in (1, None))): # col is int or slice with step 1, row is slice with step 1. return self._get_submatrix(row, col) elif issequence(col): # row is slice, col is sequence. P = extractor(col, self.shape[1]).T # [1:2,[1,2]] sliced = self if row != slice(None, None, None): sliced = sliced[row, :] return sliced * P elif issequence(row): # [[1,2],??] if isintlike(col) or isinstance(col, slice): P = extractor(row, self.shape[0]) # [[1,2],j] or [[1,2],1:2] extracted = P * self if col == slice(None, None, None): return extracted else: return extracted[:, col] if not (issequence(col) and issequence(row)): # Sample elementwise row, col = self._index_to_arrays(row, col) row = asindices(row) col = asindices(col) if row.shape != col.shape: raise IndexError('number of row and column indices differ') assert row.ndim <= 2 num_samples = np.size(row) if num_samples == 0: return csr_matrix(np.atleast_2d(row).shape, dtype=self.dtype) check_bounds(row, self.shape[0]) check_bounds(col, self.shape[1]) val = np.empty(num_samples, dtype=self.dtype) csr_sample_values(self.shape[0], self.shape[1], self.indptr, self.indices, self.data, num_samples, row.ravel(), col.ravel(), val) if row.ndim == 1: # row and col are 1d return np.asmatrix(val) return self.__class__(val.reshape(row.shape))
def __getitem__(self, index): """If key=(i,j) is a pair of integers, return the corresponding element. If either i or j is a slice or sequence, return a new sparse matrix with just these elements. """ i, j = self._unpack_index(index) i_intlike = isintlike(i) j_intlike = isintlike(j) if i_intlike and j_intlike: # Scalar index case i = int(i) j = int(j) if i < 0: i += self.shape[0] if i < 0 or i >= self.shape[0]: raise IndexError('index out of bounds') if j < 0: j += self.shape[1] if j < 0 or j >= self.shape[1]: raise IndexError('index out of bounds') return ddict.get(self, (i, j), 0.) elif ((i_intlike or isinstance(i, slice)) and (j_intlike or isinstance(j, slice))): # Fast path for slicing very sparse matrices i_slice = slice(i, i + 1) if i_intlike else i j_slice = slice(j, j + 1) if j_intlike else j i_indices = i_slice.indices(self.shape[0]) j_indices = j_slice.indices(self.shape[1]) i_seq = xrange(*i_indices) j_seq = xrange(*j_indices) newshape = (len(i_seq), len(j_seq)) newsize = _prod(newshape) if len(self) < 2 * newsize and newsize != 0: # Switch to the fast path only when advantageous # (count the iterations in the loops, adjust for complexity) # # We also don't handle newsize == 0 here (if # i/j_intlike, it can mean index i or j was out of # bounds) return self._getitem_ranges(i_indices, j_indices, newshape) i, j = self._index_to_arrays(i, j) if i.size == 0: return dok_matrix(i.shape, dtype=self.dtype) min_i = i.min() if min_i < -self.shape[0] or i.max() >= self.shape[0]: raise IndexError('index (%d) out of range -%d to %d)' % (i.min(), self.shape[0], self.shape[0] - 1)) if min_i < 0: i = i.copy() i[i < 0] += self.shape[0] min_j = j.min() if min_j < -self.shape[0] or j.max() >= self.shape[1]: raise IndexError('index (%d) out of range -%d to %d)' % (j.min(), self.shape[1], self.shape[1] - 1)) if min_j < 0: j = j.copy() j[j < 0] += self.shape[1] newdok = dok_matrix(i.shape, dtype=self.dtype) for a in xrange(i.shape[0]): for b in xrange(i.shape[1]): v = ddict.get(self, (i[a, b], j[a, b]), 0.) if v != 0: ddict.__setitem__(newdok, (a, b), v) return newdok