def test_ScaledArray_array(): for N in range(3, 5): for P in range(3, 5): array = np.random.rand(N, P) + 1 std = np.diag(1 / np.std(array, axis=0)) mu = np.mean(array, axis=0) sarray = ScaledCenterArray(scale=False, center=False) sarray.fit(da.array(array)) np.testing.assert_array_almost_equal(array, sarray.array) np.testing.assert_array_almost_equal(array.T, sarray.T.array) # With Scale but No Center # B = AD b_array = array.dot(std) sarray = ScaledCenterArray(scale=True, center=False) sarray.fit(da.array(array)) np.testing.assert_array_almost_equal(b_array, sarray.array) np.testing.assert_array_almost_equal(b_array.T, sarray.T.array) # With Center but No Scale: # B = (A - U) b_array = array - mu sarray = ScaledCenterArray(scale=False, center=True) sarray.fit(da.array(array)) np.testing.assert_array_almost_equal(b_array, sarray.array) np.testing.assert_array_almost_equal(b_array.T, sarray.T.array) # With Center and Scale: # (A - U)'D'D(A - U)x b_array = (array - mu).dot(std) sarray = ScaledCenterArray(scale=True, center=True) sarray.fit(da.array(array)) np.testing.assert_array_almost_equal(b_array, sarray.array) np.testing.assert_array_almost_equal(b_array.T, sarray.T.array)
def test_PowerMethod_reset(): for start in [True, False]: PM = PowerMethod(sub_svd_start=start) _, _, _ = PM.svd(da.array(np.random.randn(100, 50))) _, _, _ = PM.svd(da.array(np.random.randn(110, 60)))
def prepare_dataset(X): len_ = X.shape[0] shape_ = X.shape d = int(da.sqrt(X.flatten().reshape(X.shape[0], -1).shape[1])) if len(shape_)==4: X = da.reshape(X, [-1, d, d, 3]) elif d==shape_[1] and len(shape_)==3: X = da.reshape(X, [-1, d, d]) X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32) else: r = d**2 - X.shape[1] train_padding = da.zeros((shape_[0], r)) X = da.vstack([X, train_padding]) X = da.reshape(X, [-1, d, d]) X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32) print('Scaling dataset') if scalar is not None: X = scaler.transform(X.flatten().reshape(-1,1).astype(da.float32)).reshape(X.shape) else: scaler = MinMaxScaler() X = scaler.fit_transform(X.flatten().reshape(-1,1).astype(da.float32)).reshape(X.shape) return X
def main(): datset_path = "cifar-10-batches-py" # time for loading st = time.time() train_images, train_labels = load_training_data(datset_path) test_images, test_labels = load_test_data(datset_path) et = time.time() print("Time taken for loading images = {}".format(et - st)) random_prediction = random_classifier(10000) random_accuracy = classification_accuracy(random_prediction, test_labels) print("Random classifier accuracy = {}".format(random_accuracy)) # ################ Naive implementation for 1NN classifier ######################## st = time.time() k = 1 test_images, train_images, train_labels = da.array(test_images), da.array( train_images), da.array(train_labels) prediction = nearest_neighbour(test_images, train_images, train_labels, k=k) accuracy = classification_accuracy(prediction, test_labels) et = time.time() print("{} nearest neighbor classifier accuracy = {}".format(k, accuracy)) print("Time taken for classifying test images = {}".format(et - st))
def workMethod(): matrix1 = dar.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18]]) matrix2 = dar.array([[5, 10, 15], [20, 25, 30], [35, 40, 45], [50, 55, 60], [65, 70, 75], [80, 85, 90]]) # Expected Results: # [1155, 1260, 1365] # [2685, 2970, 3255] # [4215, 4680, 5145] print('Matrix 1:') print(matrix1.compute()) print('\n') print('Matrix 2:') print(matrix2.compute()) print('\n') result = dar.dot(matrix1, matrix2) # result.visualize(filename='./Results/DaskSyncMatrixMultFiles/DaskSyncMatrixMultGraph') print('Final Result') print(result.compute()) print('\n')
def test_add_bands(self): from satpy.composites import add_bands import dask.array as da import numpy as np import xarray as xr # L + RGB -> RGB data = xr.DataArray(da.ones((1, 3, 3)), dims=('bands', 'y', 'x'), coords={'bands': ['L']}) new_bands = xr.DataArray(da.array(['R', 'G', 'B']), dims=('bands'), coords={'bands': ['R', 'G', 'B']}) res = add_bands(data, new_bands) res_bands = ['R', 'G', 'B'] self.assertEqual(res.mode, ''.join(res_bands)) np.testing.assert_array_equal(res.bands, res_bands) np.testing.assert_array_equal(res.coords['bands'], res_bands) # L + RGBA -> RGBA data = xr.DataArray(da.ones((1, 3, 3)), dims=('bands', 'y', 'x'), coords={'bands': ['L']}, attrs={'mode': 'L'}) new_bands = xr.DataArray(da.array(['R', 'G', 'B', 'A']), dims=('bands'), coords={'bands': ['R', 'G', 'B', 'A']}) res = add_bands(data, new_bands) res_bands = ['R', 'G', 'B', 'A'] self.assertEqual(res.mode, ''.join(res_bands)) np.testing.assert_array_equal(res.bands, res_bands) np.testing.assert_array_equal(res.coords['bands'], res_bands) # LA + RGB -> RGBA data = xr.DataArray(da.ones((2, 3, 3)), dims=('bands', 'y', 'x'), coords={'bands': ['L', 'A']}, attrs={'mode': 'LA'}) new_bands = xr.DataArray(da.array(['R', 'G', 'B']), dims=('bands'), coords={'bands': ['R', 'G', 'B']}) res = add_bands(data, new_bands) res_bands = ['R', 'G', 'B', 'A'] self.assertEqual(res.mode, ''.join(res_bands)) np.testing.assert_array_equal(res.bands, res_bands) np.testing.assert_array_equal(res.coords['bands'], res_bands) # RGB + RGBA -> RGBA data = xr.DataArray(da.ones((3, 3, 3)), dims=('bands', 'y', 'x'), coords={'bands': ['R', 'G', 'B']}, attrs={'mode': 'RGB'}) new_bands = xr.DataArray(da.array(['R', 'G', 'B', 'A']), dims=('bands'), coords={'bands': ['R', 'G', 'B', 'A']}) res = add_bands(data, new_bands) res_bands = ['R', 'G', 'B', 'A'] self.assertEqual(res.mode, ''.join(res_bands)) np.testing.assert_array_equal(res.bands, res_bands) np.testing.assert_array_equal(res.coords['bands'], res_bands)
def get_groups(model: "sbmtm", l: int = 0) -> Tuple[da.array, da.array]: # rewrite from _sbmtm to use dask V = model.get_V() D = model.get_D() g = model.g state = model.state state_l = state.project_level(l).copy(overlap=True) state_l_edges = state_l.get_edge_blocks() # labeled half-edges # count labeled half-edges, group-memberships B = state_l.get_B() id_dbw = np.zeros(g.edge_index_range, dtype=np.dtype(int)) id_wb = np.zeros(g.edge_index_range, dtype=np.dtype(int)) id_b = np.zeros(g.edge_index_range, dtype=np.dtype(int)) weig = np.zeros(g.edge_index_range, dtype=np.dtype(int)) for i, e in enumerate(g.edges()): _, id_b[i] = state_l_edges[e] id_dbw[i] = int(e.source()) id_wb[i] = int(e.target()) - D weig[i] = g.ep["count"][e] n_bw = sparse.COO( [id_b, id_wb], weig, shape=(B, V), fill_value=0 ) # number of half-edges incident on word-node w and labeled as word-group tw del id_wb n_dbw = sparse.COO( [id_dbw, id_b], weig, shape=(D, B), fill_value=0 ) # number of half-edges incident on document-node d and labeled as word-group td del weig del id_b del id_dbw ind_w = np.where(np.sum(n_bw, axis=1) > 0)[0] n_bw = n_bw[ind_w, :] del ind_w ind_w2 = np.where(np.sum(n_dbw, axis=0) > 0)[0] n_dbw = n_dbw[:, ind_w2] del ind_w2 # topic-distribution for words P(t_w | w) p_w_tw = n_bw / np.sum(n_bw, axis=1).todense()[:, np.newaxis] # Mixture of word-groups into documetns P(d | t_w) p_tw_d = n_dbw / np.sum(n_dbw, axis=0).todense()[np.newaxis, :] return ( da.array(p_w_tw).map_blocks(lambda b: b.todense(), dtype=np.dtype(float)), da.array(p_tw_d).map_blocks(lambda b: b.todense(), dtype=np.dtype(float)), )
def test_array(): x = np.ones(5, dtype="i4") d = da.ones(5, chunks=3, dtype="i4") assert_eq(da.array(d, ndmin=3, dtype="i8"), np.array(x, ndmin=3, dtype="i8")) # regression #1847 this shall not raise an exception. x = da.ones((100, 3), chunks=10) y = da.array(x) assert isinstance(y, da.Array)
def test_array_id(): array = da.array(np.random.rand(10, 7)) x = da.array(np.random.rand(10,5)) sarray = ScaledCenterArray(scale=True, center=True) sarray.fit(da.array(array), x=x) sarray_T = sarray.T assert id(sarray._array) == id(sarray_T._array) assert id(sarray.center_vector) == id(sarray_T.center_vector) assert id(sarray._array_moment.scale_matrix) == id(sarray_T._array_moment.scale_matrix) assert id(sarray._array_moment.sym_scale_matrix) == id(sarray_T._array_moment.sym_scale_matrix)
def test_array(): x = np.ones(5, dtype='i4') d = da.ones(5, chunks=3, dtype='i4') assert_eq(da.array(d, ndmin=3, dtype='i8'), np.array(x, ndmin=3, dtype='i8')) # regression #1847 this shall not raise an exception. x = da.ones((100,3), chunks=10) y = da.array(x) assert isinstance(y, da.Array)
def test_array_tranpose_tranpose(): array = da.array(np.random.rand(7, 10)) x = da.array(np.random.rand(10, 5)) sarray = ScaledCenterArray(scale=True, center=True) sarray.fit(da.array(array)) s_array_T_T = sarray.T.T assert id(sarray._array) == id(s_array_T_T._array) assert id(sarray) == id(s_array_T_T) np.testing.assert_array_equal(sarray.dot(x), s_array_T_T.dot(x))
def test_lazy_nd_points_and_bounds(self): self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_lazy, bounds=self.bds_lazy) collapsed_coord = coord.collapsed() self.assertTrue(collapsed_coord.has_lazy_points()) self.assertTrue(collapsed_coord.has_lazy_bounds()) self.assertArrayEqual(collapsed_coord.points, da.array([55])) self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
def run_whitening(with_dask): # CHECKING THE TYPES if with_dask: import dask.array as numerical_module else: import numpy as numerical_module # Tests our Whitening extractor. data = numerical_module.array([ [1.2622, -1.6443, 0.1889], [0.4286, -0.8922, 1.3020], [-0.6613, 0.0430, 0.6377], [-0.8718, -0.4788, 0.3988], [-0.0098, -0.3121, -0.1807], [0.4301, 0.4886, -0.1456], ]) sample = numerical_module.array([1, 2, 3.0]) # Expected results (from matlab) mean_ref = numerical_module.array( [0.096324163333333, -0.465965438333333, 0.366839091666667]) whit_ref = numerical_module.array([ [1.608410253685985, 0, 0], [1.079813355720326, 1.411083365535711, 0], [0.693459921529905, 0.571417184139332, 1.800117179839927], ]) sample_whitened_ref = numerical_module.array( [5.942255453628436, 4.984316201643742, 4.739998188373740]) # Runs whitening (first method) t = Whitening() t.fit(data) s = t.transform(sample) # Makes sure results are good eps = 1e-4 assert np.allclose(t.input_subtract, mean_ref, eps, eps) assert np.allclose(t.weights, whit_ref, eps, eps) assert np.allclose(s, sample_whitened_ref, eps, eps) # Runs whitening (second method) m2 = t.fit(data) s2 = t.transform(sample) # Makes sure results are good eps = 1e-4 assert np.allclose(m2.input_subtract, mean_ref, eps, eps) assert np.allclose(m2.weights, whit_ref, eps, eps) assert np.allclose(s2, sample_whitened_ref, eps, eps)
def test_ScaledArray_sym_mat_mult(): for N in range(2, 5): for P in range(2, 5): array = np.random.rand(N, P) + 1 std = np.diag(1/np.std(array, axis=0)) mu = np.mean(array, axis=0) for factor in [None, 'n', 'p']: if factor is None: f = 1 elif factor == 'n': f = N else: f = P for K in range(1, 5): for squeeze in [True, False]: x = np.random.rand(N, K) if squeeze: x = np.squeeze(x) for fit_x in [x, None]: # With No Scale or Center # x = A'Ax result = array.dot(array.T.dot(x))/f assert result.shape == x.shape sarray = ScaledCenterArray(scale=False, center=False, factor=factor) sarray.fit(da.array(array), x=fit_x) np.testing.assert_array_equal(result, sarray.sym_mat_mult(x)) # With Scale but No Center # B = AD b_array = array.dot(std) result = b_array.dot(b_array.T.dot(x))/f assert result.shape == x.shape sarray = ScaledCenterArray(scale=True, center=False, factor=factor) sarray.fit(da.array(array), x=fit_x) np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x)) # With Center but No Scale: # B = (A - U) b_array = array - mu result = b_array.dot(b_array.T.dot(x))/f sarray = ScaledCenterArray(scale=False, center=True, factor=factor) sarray.fit(da.array(array), x=fit_x) np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x)) # With Center and Scale: # (A - U)'D'D(A - U)x result = (array - mu).dot(std).dot(std).dot((array - mu).T.dot(x))/f sarray = ScaledCenterArray(scale=True, center=True, factor=factor) sarray.fit(da.array(array), x=fit_x) np.testing.assert_array_almost_equal(result, sarray.sym_mat_mult(x))
def test_lazy_nd_points_and_bounds(self): import dask.array as da self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_lazy, bounds=self.bds_lazy) collapsed_coord = coord.collapsed() self.assertTrue(collapsed_coord.has_lazy_points()) self.assertTrue(collapsed_coord.has_lazy_bounds()) self.assertArrayEqual(collapsed_coord.points, da.array([55])) self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
def run_wccn(with_dask): # CHECKING THE TYPES if with_dask: import dask.array as numerical_module else: import numpy as numerical_module # Tests our Whitening extractor. X = numerical_module.array([ [1.2622, -1.6443, 0.1889], [0.4286, -0.8922, 1.3020], [-0.6613, 0.0430, 0.6377], [-0.8718, -0.4788, 0.3988], [-0.0098, -0.3121, -0.1807], [0.4301, 0.4886, -0.1456], ]) y = [0, 0, 1, 1, 2, 2] sample = numerical_module.array([1, 2, 3.0]) # Expected results mean_ref = numerical_module.array([0.0, 0.0, 0.0]) weight_ref = numerical_module.array([ [15.8455444, 0.0, 0.0], [-10.7946764, 2.87942129, 0.0], [18.76762201, -2.19719292, 2.1505817], ]) sample_wccn_ref = numerical_module.array( [50.55905765, -0.83273618, 6.45174511]) # Runs WCCN (first method) t = WCCN() t.fit(X, y=y) s = t.transform(sample) # Makes sure results are good eps = 1e-4 assert np.allclose(t.input_subtract, mean_ref, eps, eps) assert np.allclose(t.weights, weight_ref, eps, eps) assert np.allclose(s, sample_wccn_ref, eps, eps) # Runs WCCN (second method) t.fit(X, y) s2 = t.transform(sample) # Makes sure results are good eps = 1e-4 assert np.allclose(t.input_subtract, mean_ref, eps, eps) assert np.allclose(t.weights, weight_ref, eps, eps) assert np.allclose(s2, sample_wccn_ref, eps, eps)
def test_PowerMethod_nan_arrays(): array = np.random.randn(100, 100) for bad_type in [float('nan')]: array[0, 0] = bad_type for start in [True, False]: PM = PowerMethod(sub_svd_start=start, max_iter=2) with pytest.raises(np.linalg.LinAlgError): _, _, _ = PM.svd(da.array(array)) clean_array = make_snp_array(da.array(array), mask_nan=True, std_method='norm', dtype='float64') _, _, _ = PM.svd(clean_array)
def fit(self, X, y): # CHECKING THE TYPES if isinstance(X, dask.array.Array): import dask.array as numerical_module from dask.array.linalg import cholesky, inv else: import numpy as numerical_module from scipy.linalg import cholesky, inv possible_labels = set(y) y_ = numerical_module.array(y) n_classes = len(possible_labels) # 1. compute the means for each label mu_l = numerical_module.array( [ numerical_module.mean( X[numerical_module.where(y_ == label)[0]], axis=0 ) for label in possible_labels ] ) # 2. Compute Sw Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float) for label in possible_labels: indexes = numerical_module.where(y_ == label)[0] X_l_mu_l = X[indexes] - mu_l[label] Sw += X_l_mu_l.T @ X_l_mu_l # 3. Compute inv scaled_Sw = (1 / n_classes) * Sw inv_scaled_Sw = pinv(scaled_Sw) if self.pinv else inv(scaled_Sw) # 3. Computes the Cholesky decomposition self.weights = cholesky( inv_scaled_Sw, lower=True ) # Setting lower true to have the same implementation as in the previous code self.input_subtract = 0 self.input_divide = 1.0 return self
def test_call_allele_frequencies__tetraploid(chunks): ds = call_allele_frequencies( get_dataset( [ [[0, 1, 2, 2], [0, 0, 0, 0], [0, 0, 1, 2]], [[0, 0, 1, 0], [0, 2, 2, 2], [2, 1, 2, 1]], [[1, 1, -1, 2], [1, 1, 1, 1], [-1, -1, -1, -1]], ], n_ploidy=4, n_allele=3, ) ) if chunks is not None: ds["call_genotype"] = ( ds["call_genotype"].dims, da.array(ds["call_genotype"]).rechunk(chunks), ) af = ds["call_allele_frequency"] np.testing.assert_equal( af, np.array( [ [[0.25, 0.25, 0.5], [1.0, 0.0, 0.0], [0.5, 0.25, 0.25]], [[0.75, 0.25, 0.0], [0.25, 0.0, 0.75], [0.0, 0.5, 0.5]], [[0.0, 2 / 3, 1 / 3], [0.0, 1.0, 0.0], [np.nan, np.nan, np.nan]], ] ), )
def _initialization(self, data, **kwargs): vec_t = self.k + self.buffer if vec_t > min(data.shape): raise ValueError( 'Cannot find more than min(n,p) singular values of array function.' 'Currently k = {}, buffer = {}. k + b > min(n,p)'.format( self.k, self.buffer)) self.array = da.array(data) if self.factor == 'n': self.factor = self.array.shape[0] elif self.factor == 'p': self.factor = self.array.shape[1] elif self.factor is None: self.factor = False if self.sub_svd_start: x = sub_svd_init( self.array, k=vec_t, warm_start_row_factor=self.init_row_sampling_factor, log=0) if self.lmbd: c_norms = np.linalg.norm(x, 2, axis=0) x *= (1 - self.lmbd) x += (self.lmbd * c_norms / np.sqrt(x.shape[0])) * da.random.normal(size=x.shape) else: x = rnormal_start(self.array, vec_t, log=0) return x.persist()
def test_call_allele_frequencies__diploid(chunks): ds = call_allele_frequencies( get_dataset( [ [[0, 0], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 1]], [[1, 1], [0, 1], [1, 0]], [[1, -1], [1, 1], [-1, -1]], ] ) ) if chunks is not None: ds["call_genotype"] = ( ds["call_genotype"].dims, da.array(ds["call_genotype"]).rechunk(chunks), ) af = ds["call_allele_frequency"] np.testing.assert_equal( af, np.array( [ [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 0.0], [0.5, 0.5]], [[0.0, 1.0], [0.5, 0.5], [0.5, 0.5]], [[0.0, 1.0], [0.0, 1.0], [np.nan, np.nan]], ] ), )
def setUp(self): """ Retrieves test data filepaths and auxiliary data and creates temporary reference data as NumPy arrays, xarray arrays and Pandas data frames. """ self.gt_filepaths, self.timestamps = setup_gt_test_data() self.nc_filepaths, _ = setup_nc_multi_test_data() self.nc_filepath, _ = setup_nc_single_test_data() self.lon = 5. self.lat = 44. sref = osr.SpatialReference() sref.ImportFromEPSG(4326) self.sref = sref row = 970 col = 246 self.x = 4323250. self.y = 1314750. self.ref_np_ar = (np.array([[[row + col] * 4]]).T + np.arange(0, 4)[:, None, None]).astype(float) xr_ar = xr.DataArray(data=da.array( self.ref_np_ar.astype(float)).rechunk((1, 1, 1)), coords={ 'time': self.timestamps, 'y': [self.y], 'x': [self.x] }, dims=['time', 'y', 'x']) self.ref_xr_ds = xr.Dataset(data_vars={'1': xr_ar}) self.ref_pd_df = self.ref_xr_ds.to_dataframe()
def _get_test_calib_for_channel_vis(self, chroot, meas): xrda = xr.DataArray data = {} data["state/celestial/earth_sun_distance"] = xrda( da.repeat(da.array([149597870.7]), 6000)) data[meas + "/channel_effective_solar_irradiance"] = xrda(50) return data
def test_calc_obs_het(self): variations = Variations(samples=da.array(['a', 'b', 'c', 'd'])) gts = np.array([[[0, 0], [0, 1], [0, -1], [-1, -1]], [[0, 0], [0, 0], [0, -1], [-1, -1]]]) dps = np.array([[5, 12, 10, 10], [10, 10, 10, 10]]) variations[GT_FIELD] = da.from_array(gts) variations[DP_FIELD] = da.from_array(dps) # with this step we create a variation with dask arrays of unknown shapes variations = remove_low_call_rate_vars(variations, 0)[FLT_VARS] het = calc_obs_het(variations, min_num_genotypes=0) self.assertTrue(np.allclose(het.compute(), [0.5, 0])) # het = calc_obs_het(variations, min_num_genotypes=10) # assert np.allclose(het, [np.NaN, np.NaN], equal_nan=True) het = calc_obs_het(variations, min_num_genotypes=0, min_call_dp_for_het_call=10) self.assertTrue(np.allclose(het.compute(), [1, 0])) het = calc_obs_het(variations, min_num_genotypes=0, max_call_dp_for_het_call=11) self.assertTrue(np.allclose(het.compute(), [0, 0])) het = calc_obs_het(variations, min_num_genotypes=0, min_call_dp_for_het_call=5) self.assertTrue(np.allclose(het.compute(), [0.5, 0]))
def test_calc_obs_het2(self): gts = np.array([[[0, 0], [0, 1], [0, -1], [-1, -1]], [[0, 0], [0, 0], [0, -1], [-1, -1]]]) dps = np.array([[5, 12, 10, 10], [10, 10, 10, 10]]) samples = np.array([str(i) for i in range(gts.shape[1])]) variations = Variations(samples=da.array(samples)) variations[GT_FIELD] = da.from_array(gts) variations[DP_FIELD] = da.from_array(dps) het = calc_obs_het(variations, min_num_genotypes=0) het = compute(het) assert np.allclose(het, [0.5, 0]) het = calc_obs_het(variations, min_num_genotypes=10) het = compute(het) assert np.allclose(het, [np.NaN, np.NaN], equal_nan=True) het = calc_obs_het(variations, min_num_genotypes=0, min_call_dp_for_het_call=10) het = compute(het) assert np.allclose(het, [1, 0]) het = calc_obs_het(variations, min_num_genotypes=0, max_call_dp_for_het_call=11) het = compute(het) assert np.allclose(het, [0, 0]) het = calc_obs_het(variations, min_num_genotypes=0, min_call_dp_for_het_call=5) het = compute(het) assert np.allclose(het, [0.5, 0])
def test_PowerMethod_case1(): n = 100 p = 80 array = np.random.rand(100, 80) mu = array.mean(axis=0) std = np.diag(1 / array.std(axis=0)) scaled_centered_array = (array - mu).dot(std) U, S, V = np.linalg.svd(scaled_centered_array, full_matrices=False) # Ground Truth array = make_snp_array(da.array(array), mean=True, std=True, std_method='norm', mask_nan=False, dtype='float64') for k in range(1, 10): U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :] PM = PowerMethod(k=k, tol=1e-9, scoring_method='rmse', max_iter=100, sub_svd_start=False, init_row_sampling_factor=1, factor=None, lmbd=0) U_k_PM, S_k_PM, V_k_PM = PM.svd(array) np.testing.assert_array_almost_equal(S_k, S_k_PM) assert V_k.shape == V_k_PM.shape == (k, p) assert U_k.shape == U_k_PM.shape == (n, k) np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0) np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)
def _compute_gt_graph(self) -> None: path = path = self.path / (self.name + ".gt.gz") g = gt.Graph(directed=False) name = g.vp["name"] = g.new_vp("int") kind = g.vp["kind"] = g.new_vp("int") ecount = g.ep["count"] = g.new_ep("int") docs_add: defaultdict = defaultdict(lambda: g.add_vertex()) words_add: defaultdict = defaultdict(lambda: g.add_vertex()) count_matrix = (da.array(self.get_count_matrix()).map_blocks( lambda b: sparse.COO(b), dtype=np.dtype(int)).compute()) n_doc, n_word = self.get_shape() for i_d in range(n_doc): d = docs_add[i_d] name[d] = i_d kind[d] = 0 for i_w in range(n_word): w = words_add[i_w] name[w] = i_w kind[w] = 1 for i in range(count_matrix.nnz): i_d, i_w = count_matrix.coords[:, i] e = g.add_edge(i_d, n_doc + i_w) ecount[e] = count_matrix.data[i] g.save(str(path)) self.data["gt"] = File(path)
def build(self, input_shape): if self.kernel_size == 3: bk = np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]]) bk = bk / np.sum(bk) elif self.kernel_size == 5: bk = np.array([[1, 4, 6, 4, 1], [4, 16, 24, 16, 4], [6, 24, 36, 24, 6], [4, 16, 24, 16, 4], [1, 4, 6, 4, 1]]) bk = bk / np.sum(bk) else: raise ValueError bk = np.repeat(bk, input_shape[3]) bk = da.array(bk) bk = da.reshape(bk, (self.kernel_size, self.kernel_size, input_shape[3], 1)) blur_init = tf.keras.initializers.constant(bk) self.blur_kernel = self.add_weight(name='blur_kernel', shape=(self.kernel_size, self.kernel_size, input_shape[3], 1), initializer=blur_init, trainable=False) super(MaxBlurPooling2D, self).build(input_shape)
def process_data(X, y=None, test_size=0.20, dummies=False): if y is None: y = da.ones(X.shape[0]) len_ = X.shape[0] X = prepare_dataset(X) if dummies: y = dd.get_dummies(y) shape_ = list(X.shape[1:]) X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape(len_,-1), y, test_size=test_size, random_state=4891) X_train = X_train.reshape([X_train.shape[0]]+shape_) X_test = X_test.reshape([X_test.shape[0]]+shape_) print('Training dataset shape: ', X_train.shape) print('Validation dataset shape: ', X_test.shape) train_dataset = Dataset(X_train, y_train) test_dataset = Dataset(X_test, y_test) samples = list() for _ in range(10): for y_uniq in da.unique(train_dataset.labels): samples.append(train_dataset.x[train_dataset.labels==y_uniq][random.randint(0,len(train_dataset.x[train_dataset.labels==y_uniq])-1)]) train_dataset.samples = da.array(samples) return train_dataset, test_dataset
def setUp(self, *mocks): """Create fake data for testing.""" self.def_cali = [-0.0037, 15.20] self.upd_cali = [-0.0074, 30.40] self.bad_cali = [0.0, 0.0] fh = AHIHSDFileHandler(filetype_info={'file_type': 'hsd_b01'}) fh.calib_mode = 'NOMINAL' fh.user_calibration = None fh.is_zipped = False fh._header = { 'block5': { 'band_number': [5], 'gain_count2rad_conversion': [self.def_cali[0]], 'offset_count2rad_conversion': [self.def_cali[1]], 'central_wave_length': [10.4073], }, 'calibration': { 'coeff_rad2albedo_conversion': [0.0019255], 'speed_of_light': [299792458.0], 'planck_constant': [6.62606957e-34], 'boltzmann_constant': [1.3806488e-23], 'c0_rad2tb_conversion': [-0.116127314574], 'c1_rad2tb_conversion': [1.00099153832], 'c2_rad2tb_conversion': [-1.76961091571e-06], 'cali_gain_count2rad_conversion': [self.upd_cali[0]], 'cali_offset_count2rad_conversion': [self.upd_cali[1]] }, } self.counts = da.array(np.array([[0., 1000.], [2000., 5000.]])) self.fh = fh
def test_PowerMethod_nan_arrays_fills(): array = np.random.randint(0, 3, size=(100, 100)).astype(float) array[0, 0] = 10000 median = round(np.median(array)) mean = round(np.mean(array)) k = 10 for method in ['mean', 'median', 10]: PM = PowerMethod(factor=None, scale=False, center=False, tol=1e-16, lmbd=0) if method == 'mean': filled_value = mean elif method == 'median': filled_value = median else: filled_value = method array[0, 1] = filled_value U, S, V = np.linalg.svd(array, full_matrices=False) U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k) array[0, 1] = float('nan') U, S, V = PM.svd(da.array(array), mask_fill=method, mask_nan=True) assert PM.array.array[0, 1] == filled_value np.testing.assert_array_almost_equal(S, S_k)
def _std_inverter(self, std): """ Parameters ---------- std : array_like, shape (P,) vector of standard deviations of the P rows of self._array Returns ------- inv_std : array_like, shape (P,) vector of 1/std """ try: std = std.compute() except AttributeError: pass degenerate_snp_columns = np.where(std <= self._std_tol) if len(degenerate_snp_columns[0]) > 0: if self._warn: warnings.warn('SNP Columns {} have low standard deviation.' ' Setting STD of columns to 1'.format( degenerate_snp_columns)) std[degenerate_snp_columns[0]] = 1 return da.array(1 / std)
def test_lazy_nd_bounds(self): import dask.array as da self.setupTestArrays((3, 4)) coord = AuxCoord(self.pts_real, bounds=self.bds_lazy) collapsed_coord = coord.collapsed() # Note that the new points get recalculated from the lazy bounds # and so end up as lazy self.assertTrue(collapsed_coord.has_lazy_points()) self.assertTrue(collapsed_coord.has_lazy_bounds()) self.assertArrayEqual(collapsed_coord.points, np.array([55])) self.assertArrayEqual(collapsed_coord.bounds, da.array([[-2, 112]]))
def _addtarr(t, dt): return darr.array([tn + dt for tn in t])
def test_array(): x = np.ones(5, dtype='i4') d = da.ones(5, chunks=3, dtype='i4') assert eq(da.array(d, ndmin=3, dtype='i8'), np.array(x, ndmin=3, dtype='i8'))
def _alloc_hpr(ensblk, group, varname): phisc = 0.01 # Scale heading, pitch and roll by 0.01. Sentinel V manual, p. 259. return darr.array([ensarr[group][varname]*phisc for ensarr in ensblk if type(ensarr)==dict])