def test_moveaxis_rollaxis_keyword(): x = np.random.random((10, 12, 7)) d = da.from_array(x, chunks=(4, 5, 2)) assert_eq(np.moveaxis(x, destination=1, source=0), da.moveaxis(d, destination=1, source=0)) assert_eq(np.rollaxis(x, 2), da.rollaxis(d, 2)) assert isinstance(da.rollaxis(d, 1), da.Array) assert_eq(np.rollaxis(x, start=1, axis=2), da.rollaxis(d, start=1, axis=2))
def _check_data_shape(data, input_idxs): """Check data shape and adjust if necessary.""" # Handle multiple datasets if data.ndim > 2 and data.shape[0] * data.shape[1] == input_idxs.shape[0]: # Move the "channel" dimension first data = da.moveaxis(data, -1, 0) # Ensure two dimensions if data.ndim == 1: data = DataArray(da.map_blocks(np.expand_dims, data.data, 0, new_axis=[0])) return data
def test_get_sample_from_bil_info(self): """Test bilinear interpolation as a whole.""" import dask.array as da from xarray import DataArray from pyresample.bilinear.xarr import XArrayResamplerBilinear resampler = XArrayResamplerBilinear(self.source_def, self.target_def, self.radius) resampler.get_bil_info() # Sample from data1 res = resampler.get_sample_from_bil_info(self.data1) res = res.compute() # Check couple of values self.assertEqual(res.values[1, 1], 1.) self.assertTrue(np.isnan(res.values[0, 3])) # Check that the values haven't gone down or up a lot self.assertAlmostEqual(np.nanmin(res.values), 1.) self.assertAlmostEqual(np.nanmax(res.values), 1.) # Check that dimensions are the same self.assertEqual(res.dims, self.data1.dims) # Sample from data1, custom fill value res = resampler.get_sample_from_bil_info(self.data1, fill_value=-1.0) res = res.compute() self.assertEqual(np.nanmin(res.values), -1.) # Sample from integer data res = resampler.get_sample_from_bil_info(self.data1.astype(np.uint8), fill_value=None) res = res.compute() # Five values should be filled with zeros, which is the # default fill_value for integer data self.assertEqual(np.sum(res == 0), 6) # Output coordinates should have been set self.assertTrue(isinstance(resampler._out_coords, dict)) self.assertTrue( np.all(resampler._out_coords['x'] == resampler.out_coords_x)) self.assertTrue( np.all(resampler._out_coords['y'] == resampler.out_coords_y)) # 3D data data = da.moveaxis(da.dstack((self.data1, self.data1)), -1, 0) data = DataArray(data, dims=('bands', 'y', 'x')) res = resampler.get_sample_from_bil_info(data) assert res.shape == (2, ) + self.target_def.shape assert res.dims == data.dims
def _graph_standard_grid(vis_dataset, cgk_1D, grid_parms): import dask import dask.array as da import xarray as xr import time import itertools # Getting data for gridding chan_chunk_size = vis_dataset[grid_parms["imaging_weight_name"]].chunks[2][0] freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size)) n_chunks_in_each_dim = vis_dataset[grid_parms["imaging_weight_name"]].data.numblocks chunk_indx = [] iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]), np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3])) if grid_parms['chan_mode'] == 'continuum': n_chan_chunks_img = 1 n_other_chunks = n_chunks_in_each_dim[0]*n_chunks_in_each_dim[1]*n_chunks_in_each_dim[2]*n_chunks_in_each_dim[3] elif grid_parms['chan_mode'] == 'cube': n_chan_chunks_img = n_chunks_in_each_dim[2] n_other_chunks = n_chunks_in_each_dim[0]*n_chunks_in_each_dim[1]*n_chunks_in_each_dim[3] #n_delayed = np.prod(n_chunks_in_each_dim) chunk_sizes = vis_dataset[grid_parms["imaging_weight_name"]].chunks list_of_grids = ndim_list((n_chan_chunks_img,n_other_chunks)) list_of_sum_weights = ndim_list((n_chan_chunks_img,n_other_chunks)) # Build graph for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx: #There are two diffrent gridder wrapped functions _standard_grid_psf_numpy_wrap and _standard_grid_numpy_wrap. #This is done to simplify the psf and weight gridding graphs so that the vis_dataset is not loaded. if grid_parms['do_psf']: sub_grid_and_sum_weights = dask.delayed(_standard_grid_psf_numpy_wrap)( vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0], vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol], freq_chan.partitions[c_chan], dask.delayed(cgk_1D), dask.delayed(grid_parms)) grid_dtype = np.double else: sub_grid_and_sum_weights = dask.delayed(_standard_grid_numpy_wrap)( vis_dataset[grid_parms["data_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol], vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0], vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol], freq_chan.partitions[c_chan], dask.delayed(cgk_1D), dask.delayed(grid_parms)) grid_dtype = np.complex128 if grid_parms['chan_mode'] == 'continuum': c_time_baseline_chan_pol = c_pol + c_chan*n_chunks_in_each_dim[3] + c_baseline*n_chunks_in_each_dim[3]*n_chunks_in_each_dim[2] + c_time*n_chunks_in_each_dim[3]*n_chunks_in_each_dim[2]*n_chunks_in_each_dim[1] list_of_grids[0][c_time_baseline_chan_pol] = da.from_delayed(sub_grid_and_sum_weights[0], (1, chunk_sizes[3][c_pol], grid_parms['imsize_padded'][0], grid_parms['imsize_padded'][1]),dtype=grid_dtype) list_of_sum_weights[0][c_time_baseline_chan_pol] = da.from_delayed(sub_grid_and_sum_weights[1],(1, chunk_sizes[3][c_pol]),dtype=np.float64) elif grid_parms['chan_mode'] == 'cube': c_time_baseline_pol = c_pol + c_baseline*n_chunks_in_each_dim[3] + c_time*n_chunks_in_each_dim[1]*n_chunks_in_each_dim[3] list_of_grids[c_chan][c_time_baseline_pol] = da.from_delayed(sub_grid_and_sum_weights[0], (chunk_sizes[2][c_chan], chunk_sizes[3][c_pol], grid_parms['imsize_padded'][0], grid_parms['imsize_padded'][1]),dtype=grid_dtype) list_of_sum_weights[c_chan][c_time_baseline_pol] = da.from_delayed(sub_grid_and_sum_weights[1],(chunk_sizes[2][c_chan], chunk_sizes[3][c_pol]),dtype=np.float64) # Sum grids for c_chan in range(n_chan_chunks_img): list_of_grids[c_chan] = _tree_sum_list(list_of_grids[c_chan]) list_of_sum_weights[c_chan] = _tree_sum_list(list_of_sum_weights[c_chan]) # Concatenate Cube if grid_parms['chan_mode'] == 'cube': list_of_grids_and_sum_weights = [da.concatenate(list_of_grids,axis=1)[0],da.concatenate(list_of_sum_weights,axis=1)[0]] else: list_of_grids_and_sum_weights = [list_of_grids[0][0],list_of_sum_weights[0][0]] # Put axes in image orientation. How much does this add to compute? list_of_grids_and_sum_weights[0] = da.moveaxis(list_of_grids_and_sum_weights[0], [0, 1], [-2, -1]) list_of_grids_and_sum_weights[1] = da.moveaxis(list_of_grids_and_sum_weights[1],[0, 1], [-2, -1]) return list_of_grids_and_sum_weights
def treeStructEval(pickledTrees, popDict, P, allSums): from dask.distributed import get_worker worker = get_worker() names = list(worker._structures.keys()) allResults = [] if not allSums: # Evaluate structures one at a time for struct in names: entry = worker._structures[struct] results = {} for svName in fullPopDict: results[svName] = {} for elem in fullPopDict[svName]: pop = np.array(fullPopDict[svName][elem]) sve = entry[svName][elem]['energy'] svf = entry[svName][elem]['forces'] if useGPU: pop = cp.asarray(pop) sve = cp.asarray(sve) svf = cp.asarray(svf) eng = sve.dot(pop) fcs = svf.dot(pop) if useGPU: eng = cp.asnumpy(eng) fcs = cp.asnumpy(fcs) Ne = eng.shape[0] Nn = eng.shape[1] // P eng = eng.reshape((Ne, Nn, P)) eng = np.moveaxis(eng, 1, 0) eng = np.moveaxis(eng, -1, 1) Na = fcs.shape[1] fcs = fcs.reshape((Ne, Na, 3, Nn, P)) fcs = np.moveaxis(fcs, -2, 0) fcs = np.moveaxis(fcs, -1, 1) results[svName][elem] = {} results[svName][elem]['energy'] = eng results[svName][elem]['forces'] = fcs counters = { svName: {el: 0 for el in fullPopDict[svName]} for svName in fullPopDict } treeResults = [] for tree in pickledTrees: tree = pickle.loads(tree) for elem in tree.chemistryTrees: for svNode in tree.chemistryTrees[elem].svNodes: svName = svNode.description idx = counters[svName][elem] eng = results[svName][elem]['energy'][idx] fcs = results[svName][elem]['forces'][idx] svNode.values = (eng, fcs) counters[svName][elem] += 1 trueForces = worker._true_forces[struct] engResult, fcsResult = tree.eval(useDask=False, allSums=allSums) Na = fcsResult[0].shape[1] fcsErrors = self.fErr(sum(fcsResult), trueForces) fcsErrors *= Na # fcsErrors = np.average( # abs(sum(fcsResult) - trueForces), axis=(1,2) # ) # Here: sum(engResult) = raw energies # Here: fcsErrors = weighted MAE or MSE treeResults.append([sum(engResult), fcsErrors]) allResults.append(treeResults) else: # Structures can be stacked and evaluated all at once entries = [worker._structures[k] for k in names] # Evaluate each SV for all structures simultaneously results = {} for svName in fullPopDict: results[svName] = {} for elem in fullPopDict[svName]: # pop = cp.asarray(fullPopDict[svName][elem]) pop = np.array(fullPopDict[svName][elem]) if useGPU: pop = cp.asarray(pop) results[svName][elem] = {} # bigSVE = np.array(database[svName][elem]['energy']) # bigSVF = np.array(database[svName][elem]['forces']) bigSVE = [e[svName][elem]['energy'] for e in entries] bigSVF = [e[svName][elem]['forces'] for e in entries] splits = np.cumsum([sve.shape[0] for sve in bigSVE]) splits = np.concatenate([[0], splits]) # useDask=True, allSums=True bigSVE = np.concatenate(bigSVE) bigSVF = np.concatenate(bigSVF) if useGPU: bigSVE = cp.asarray(bigSVE) bigSVF = cp.asarray(bigSVF) eng = bigSVE.dot(pop) fcs = bigSVF.dot(pop) if useGPU: eng = cp.asnumpy(eng) fcs = cp.asnumpy(fcs) Ne = eng.shape[0] Nn = eng.shape[1] // P eng = eng.reshape((Ne, Nn, P)) eng = np.moveaxis(eng, 1, 0) eng = np.moveaxis(eng, -1, 1) Na = fcs.shape[0] fcs = fcs.reshape((Na, 3, Nn, P)) fcs = da.moveaxis(fcs, -2, 0) fcs = da.moveaxis(fcs, -1, 1) perEntryEng = [] perEntryFcs = [] for idx in range(len(splits) - 1): start = splits[idx] stop = splits[idx + 1] perEntryEng.append(eng[:, :, start:stop]) perEntryFcs.append(fcs[:, :, start:stop, :]) results[svName][elem]['energy'] = perEntryEng results[svName][elem]['forces'] = perEntryFcs del bigSVE del bigSVF if useGPU: cp._default_memory_pool.free_all_blocks() cp._default_pinned_memory_pool.free_all_blocks() # Now parse the results # for entryNum, struct in enumerate(database.attrs['structNames']): for entryNum, struct in enumerate(names): counters = { svName: {el: 0 for el in fullPopDict[svName]} for svName in fullPopDict } treeResults = [] for tree in pickledTrees: tree = pickle.loads(tree) for elem in tree.chemistryTrees: for svNode in tree.chemistryTrees[elem].svNodes: svName = svNode.description idx = counters[svName][elem] eng = results[svName][elem]['energy'][ entryNum][idx] fcs = results[svName][elem]['forces'][ entryNum][idx] svNode.values = (eng, fcs) counters[svName][elem] += 1 trueForces = worker._true_forces[struct] # engResult, fcsResult = tree.eval(useDask=False, allSums=allSums) # fcsErrors = sum(fcsResult) # fcsErrors = np.average( # abs(sum(fcsResult) - trueForces), axis=(1,2) # ) engResult, fcsResult = tree.eval(useDask=False, allSums=allSums) Na = fcsResult[0].shape[1] fcsErrors = self.fErr(sum(fcsResult), trueForces) fcsErrors *= Na treeResults.append([sum(engResult), fcsErrors]) allResults.append(treeResults) allResults = np.array(allResults, dtype=np.float32) return allResults, names
def _graph_aperture_grid(vis_dataset, gcf_dataset, grid_parms, sel_parms): import dask import dask.array as da import xarray as xr import time import itertools import matplotlib.pyplot as plt # Getting data for gridding chan_chunk_size = vis_dataset[sel_parms["imaging_weight"]].chunks[2][0] freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size)) n_chunks_in_each_dim = vis_dataset[ sel_parms["imaging_weight"]].data.numblocks chunk_indx = [] iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]), np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3])) if grid_parms['chan_mode'] == 'continuum': n_chan_chunks_img = 1 n_other_chunks = n_chunks_in_each_dim[0] * n_chunks_in_each_dim[ 1] * n_chunks_in_each_dim[2] * n_chunks_in_each_dim[3] elif grid_parms['chan_mode'] == 'cube': n_chan_chunks_img = n_chunks_in_each_dim[2] n_other_chunks = n_chunks_in_each_dim[0] * n_chunks_in_each_dim[ 1] * n_chunks_in_each_dim[3] #n_delayed = np.prod(n_chunks_in_each_dim) chunk_sizes = vis_dataset[sel_parms["imaging_weight"]].chunks list_of_grids = ndim_list((n_chan_chunks_img, n_other_chunks)) list_of_sum_weights = ndim_list((n_chan_chunks_img, n_other_chunks)) #print(cf_dataset) grid_parms['complex_grid'] = True # Build graph for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx: if grid_parms['grid_weights']: sub_grid_and_sum_weights = dask.delayed( _aperture_weight_grid_numpy_wrap)( vis_dataset[sel_parms["uvw"]].data.partitions[c_time, c_baseline, 0], vis_dataset[sel_parms["imaging_weight"]].data.partitions[ c_time, c_baseline, c_chan, c_pol], vis_dataset["field_id"].data.partitions[c_time], gcf_dataset["CF_BASELINE_MAP"].data.partitions[c_baseline], gcf_dataset["CF_CHAN_MAP"].data.partitions[c_chan], gcf_dataset["CF_POL_MAP"].data.partitions[c_pol], gcf_dataset["WEIGHT_CONV_KERNEL"].data, gcf_dataset["SUPPORT"].data, gcf_dataset["PHASE_GRADIENT"].data, freq_chan.partitions[c_chan], dask.delayed(grid_parms)) grid_dtype = np.complex128 else: sub_grid_and_sum_weights = dask.delayed(_aperture_grid_numpy_wrap)( vis_dataset[sel_parms["data"]].data.partitions[c_time, c_baseline, c_chan, c_pol], vis_dataset[sel_parms["uvw"]].data.partitions[c_time, c_baseline, 0], vis_dataset[sel_parms["imaging_weight"]].data.partitions[ c_time, c_baseline, c_chan, c_pol], vis_dataset["field_id"].data.partitions[c_time], gcf_dataset["CF_BASELINE_MAP"].data.partitions[c_baseline], gcf_dataset["CF_CHAN_MAP"].data.partitions[c_chan], gcf_dataset["CF_POL_MAP"].data.partitions[c_pol], gcf_dataset["CONV_KERNEL"].data, gcf_dataset["SUPPORT"].data, gcf_dataset["PHASE_GRADIENT"].data, freq_chan.partitions[c_chan], dask.delayed(grid_parms)) grid_dtype = np.complex128 if grid_parms['chan_mode'] == 'continuum': c_time_baseline_chan_pol = c_pol + c_chan * n_chunks_in_each_dim[ 3] + c_baseline * n_chunks_in_each_dim[ 3] * n_chunks_in_each_dim[ 2] + c_time * n_chunks_in_each_dim[ 3] * n_chunks_in_each_dim[ 2] * n_chunks_in_each_dim[1] list_of_grids[0][c_time_baseline_chan_pol] = da.from_delayed( sub_grid_and_sum_weights[0], (1, chunk_sizes[3][c_pol], grid_parms['image_size_padded'][0], grid_parms['image_size_padded'][1]), dtype=grid_dtype) list_of_sum_weights[0][c_time_baseline_chan_pol] = da.from_delayed( sub_grid_and_sum_weights[1], (1, chunk_sizes[3][c_pol]), dtype=np.double) elif grid_parms['chan_mode'] == 'cube': c_time_baseline_pol = c_pol + c_baseline * n_chunks_in_each_dim[ 3] + c_time * n_chunks_in_each_dim[1] * n_chunks_in_each_dim[3] list_of_grids[c_chan][c_time_baseline_pol] = da.from_delayed( sub_grid_and_sum_weights[0], (chunk_sizes[2][c_chan], chunk_sizes[3][c_pol], grid_parms['image_size_padded'][0], grid_parms['image_size_padded'][1]), dtype=grid_dtype) list_of_sum_weights[c_chan][c_time_baseline_pol] = da.from_delayed( sub_grid_and_sum_weights[1], (chunk_sizes[2][c_chan], chunk_sizes[3][c_pol]), dtype=np.double) # Sum grids for c_chan in range(n_chan_chunks_img): list_of_grids[c_chan] = _tree_sum_list(list_of_grids[c_chan]) list_of_sum_weights[c_chan] = _tree_sum_list( list_of_sum_weights[c_chan]) # Concatenate Cube if grid_parms['chan_mode'] == 'cube': list_of_grids_and_sum_weights = [ da.concatenate(list_of_grids, axis=1)[0], da.concatenate(list_of_sum_weights, axis=1)[0] ] else: list_of_grids_and_sum_weights = [ list_of_grids[0][0], list_of_sum_weights[0][0] ] # Put axes in image orientation. How much does this add to compute? list_of_grids_and_sum_weights[0] = da.moveaxis( list_of_grids_and_sum_weights[0], [0, 1], [-2, -1]) list_of_grids_and_sum_weights[1] = da.moveaxis( list_of_grids_and_sum_weights[1], [0, 1], [-2, -1]) return list_of_grids_and_sum_weights