def _rmatvec(self, x): x = da.reshape(x, self.dims_fft) if self.chunks[1] is not None: x = x.rechunk(self.chunks[1]) y = sqrt(np.prod(self.nffts)) * da.fft.ifft2( x, s=self.nffts, axes=(self.dirs[0], self.dirs[1])) y = da.take(y, np.arange(self.dims[self.dirs[0]]), axis=self.dirs[0]) y = da.take(y, np.arange(self.dims[self.dirs[1]]), axis=self.dirs[1]) y = y.ravel() return y
def __getitem__(self, index): batch = self.labels.iloc[index * self.batch_size:(index + 1) * self.batch_size] optical_flow = da.take(self.optical_flow, batch['frame'].values, axis=0).compute() segments = da.take(self.segments, batch['frame'].values, axis=0).compute() frames = np.concatenate( [optical_flow, np.expand_dims(segments, 4)], axis=3) return frames, batch['speed'].values
def all(self): labels = self.labels.sort_values('frame') with dask.config.set(pool=ThreadPool(8)): print 'load dask array...' optical_flow = da.take(self.optical_flow, labels['frame'].values, axis=0).compute() segments = da.take(self.segments, labels['frame'].values, axis=0).compute() frames = np.concatenate( [optical_flow, np.expand_dims(segments, 4)], axis=3) return frames, labels['speed'].values
def coarsen_destagger_dask(x, blocks, stagger=None, mode='wrap'): """ Examples -------- >>> x = da.arange(6, chunks=6) >>> xc = coarsen_destagger_dask(x, {0: 2}, stagger=0) >>> xc.compute() array([ 1. , 3. , 3.5]) >>> x = da.from_array(x, chunks=x.shape) >>> xc = coarsen_destagger_dask(x, {0: 2}, stagger=0) >>> xc.compute() array([ 1. , 3. , 3.5]) """ output_numpy = False try: x._keys except AttributeError: output_numpy = True x = da.from_array(x, x.shape) xcoarse = coarsen_centered_np(x, blocks) # TODO refactor this code to another function if stagger is not None: blk = {key: val for key, val in blocks.items() if key != stagger} left_inds = np.arange(0, x.shape[stagger], blocks[stagger]) left = da.coarsen(np.sum, da.take(x, left_inds, stagger), blk) n = left.shape[stagger] # handle boundary conditions if mode == 'wrap': bc = da.take(left, [0], axis=stagger) elif mode == 'clip': bc = da.take(left, [-1], axis=stagger) else: raise ValueError(f"Unknown boundary `mode` given: {mode}") right = da.take(left, np.arange(1, n), axis=stagger) right = da.concatenate((right, bc), axis=stagger) xcoarse = xcoarse + (right - left)/2 n = np.prod(list(blocks.values())) ans = xcoarse/n if output_numpy: return ans.compute() else: return ans
def _rmatvec(self, x): if self.reshape: x = da.reshape(x, self.dims_fft) if self.chunks[1] is not None: x = x.rechunk(self.chunks[1]) if not self.reshape: if self.real: y = sqrt(self.nfft) * da.fft.irfft(x, n=self.nfft, axis=-1) y = da.real(y) else: y = sqrt(self.nfft) * da.fft.ifft(x, n=self.nfft, axis=-1) if self.nfft != self.dims[self.dir]: y = y[:self.dims[self.dir]] if self.fftshift: y = da.fft.fftshift(y) else: if self.real: y = sqrt(self.nfft) * da.fft.irfft( x, n=self.nfft, axis=self.dir) y = da.real(y) else: y = sqrt(self.nfft) * da.fft.ifft( x, n=self.nfft, axis=self.dir) if self.nfft != self.dims[self.dir]: y = da.take(y, np.arange(0, self.dims[self.dir]), axis=self.dir) if self.fftshift: y = da.fft.fftshift(y, axes=self.dir) y = y.ravel() y = y.astype(self.dtype) return y
def test_take_dask_from_numpy(): x = np.arange(5).astype('f8') y = da.from_array(np.array([1, 2, 3, 3, 2 ,1]), chunks=3) z = da.take(x * 2, y) assert z.chunks == y.chunks assert eq(z, np.array([2., 4., 6., 6., 4., 2.]))
def test_take_dask_from_numpy(): x = np.arange(5).astype('f8') y = da.from_array(np.array([1, 2, 3, 3, 2, 1]), chunks=3) z = da.take(x * 2, y) assert z.chunks == y.chunks assert_eq(z, np.array([2., 4., 6., 6., 4., 2.]))
def _matvec(self, x): if not self.inplace: x = x.copy() if not self.reshape: y = x[self.iava] else: x = da.reshape(x, self.dims) y = da.take(x, self.iava, axis=self.dir) return y
def predict(self, X): if len(self.estimators) == 1: preds = self.estimators[0].predict(X) else: preds = self.proba2predict(self.predict_proba(X)) if self.task != 'regression' and self.classes_ is not None: preds = da.take(np.array(self.classes_), preds, axis=0) return preds
def _ifftshift(x, axes=None): """Similar to numpy.fft.ifttshift but based on dask.array""" if axes is None: axes = list(range(x.ndim)) elif isinstance(axes, integer_types): axes = (axes, ) for k in axes: n = x.shape[k] p2 = n - (n + 1) // 2 mylist = np.concatenate((np.arange(p2, n), np.arange(p2))) x = da.take(x, mylist, k) return x
def _dask_oindex(x, indices): """Perform outer indexing on dask array `x`, one dimension at a time. It is assumed that `indices` is suitably normalised (no ellipsis, etc.) """ axis = 0 for index in indices: x = da.take(x, index, axis=axis) # If axis wasn't dropped by a scalar index: if not isinstance(index, Integral): axis += 1 return x
def dataset(self): with self._lock: if self._dataset is None: try: dataset = self._orig_dataset[self.keep] except NotImplementedError: # Dask does not like multiple boolean indices: go one dim at a time dataset = self._orig_dataset for dim, keep_per_dim in enumerate(self.keep): dataset = da.take(dataset, keep_per_dim, axis=dim) for transform in self.transforms: dataset = transform(dataset) self._dataset = dataset self._orig_dataset = None return self._dataset
def test_take(): x = np.arange(400).reshape((20, 20)) a = da.from_array(x, chunks=(5, 5)) assert_eq(np.take(x, 3, axis=0), da.take(a, 3, axis=0)) assert_eq(np.take(x, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1)) with pytest.raises(ValueError): da.take(a, 3, axis=2) assert same_keys(da.take(a, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1))
def take(self, indices, axis=None): out = da.take(self, indices, axis=axis) if len(out.shape) == len(self.shape): out = view_subclass(out, type(self)) return out
def take_array(arr, indices, axis=None): if DaskToolBox.exist_dask_object(arr, indices): return da.take(arr, indices=indices, axis=axis) else: return np.take(arr, indices=indices, axis=axis)
def select_indices(a, indices, *, axis=0): a = ensure_dask_array(a) indices = ensure_dask_or_numpy_array(indices) return da.take(a, indices, axis=axis)
for cross_id in xids: print("processing", cross_id, "...") par_ix = xdf.query('cross == @cross_id').query( 'role == "parent"').index.values pro_ix = xdf.query('cross == @cross_id').query( 'role == "progeny"').index.values if par_ix.size != 2: print("Must be two parents: {0} found".format(par_ix.size)) continue # grab genotypes of cross and AD pr_gt = x_gt.take(par_ix, axis=1) pg_gt = x_gt.take(pro_ix, axis=1) pr_ad = da.take(x_ad, par_ix, axis=1) # count hom refs and alts of parents hom_alt_sum = pr_gt.is_hom_alt().sum(axis=1).compute() hom_ref_sum = pr_gt.is_hom_ref().sum(axis=1).compute() # identify discordance mat_discords, mat_cov = compute_discords(pr_gt[:, 0], pr_ad[:, 0]) pat_discords, pat_cov = compute_discords(pr_gt[:, 1], pr_ad[:, 1]) # identify high coverage sites loc_high_cov = (mat_cov >= 30) & (pat_cov >= 30) loc_no_discords = (mat_discords + pat_discords) == 0 loc_sufficient_calls = pg_gt.count_called(axis=1).compute() >= 10 loc_all_filters = (loc_high_cov & loc_sufficient_calls) & loc_no_discords