def write_cache_file(entry, input, context): data = [] chunk_axis = None chunk_axis_index = None entry.local_path = entry.new_output_path() with context.new_output(entry.local_path) as outfile: for _, _, chunk in input.chunks_ingress(context): if chunk_axis is None: chunk_axis_index = chunk.getAxisIndex(input.chunk_axis) chunk_axis = chunk.getAxis(chunk_axis_index) if chunk_axis.isTime(): outfile.write(chunk, id=input.variable.var_name) else: data.append(chunk) if chunk_axis is not None and not chunk_axis.isTime(): data = MV2.concatenate(data, axis=chunk_axis_index) outfile.write(data, id=input.variable.var_name) try: size = entry.set_size() except db.IntegrityError: # Handle case where cache files are written at same time pass else: metrics.WPS_DATA_CACHE_WRITE.inc(size)
def testSubRegion(self): # positional s2 = self.var.subRegion((366., 731., 'ccn'), (-42., 42., 'ccn'), (90., 270., 'con')) self.assertTrue(numpy.ma.allequal(self.vp, s2)) # squeeze s2 = self.var.subRegion((731., 731., 'ccn'), (-42., 42., 'ccn'), (90., 270., 'con'), squeeze=1) self.assertEqual(len(s2.shape), 2) # keyword s2 = self.var.subRegion(latitude=(-42., 42., 'ccn'), longitude=(90., 270., 'con'), time=(366., 731., 'ccn')) self.assertTrue(numpy.ma.allequal(self.vp, s2)) # Wraparound u = self.file['u'] u1 = u[:, :, 8:] u2 = u[:, :, :8] ucat = MV.concatenate((u1, u2), axis=2) su = u.subRegion(lon=(90, 450, 'co')) self.assertTrue(numpy.ma.allequal(ucat, su))
markError('subRegion squeeze option failed') # mf 20010308 subRegion - extended wrap fw = cdms2.open(os.path.join(pth, 'ps.wrap.test.0E.nc')) ps = fw.getVariable('ps') ps1 = ps[:, :, 36:] ps2 = ps[:, :, :37] s2 = numpy.ma.concatenate((ps1, ps2), axis=2) s2w = fw('ps', longitude=(-180, 180, 'ccn')) if not numpy.ma.allequal(s2, s2w): markError('subRegion extended wrap') varlist = fw.getVariables(spatial=1) u = f['u'] u1 = u[:, :, 8:] u2 = u[:, :, :8] ucat = MV.concatenate((u1, u2), axis=2) su = u.subRegion(lon=(90, 450, 'co')) if not numpy.ma.allequal(ucat, su): markError('subRegion wrap, test 2') # negative strides fc = cdms2.Cdunif.CdunifFile(os.path.join(pth, 'ps.wrap.test.0E.nc')) psc = fc.variables['ps'] psb = psc[:] s3c = psb[0, ::-1] s4c = psb[0, ::-2] s3 = fw('ps', latitude=(90, -90)) if not numpy.ma.allequal(s3, s3c): markError('Reverse interval failed') s4 = ps.getSlice(':', (None, None, -1)) # s4 = ps.subRegion(latitude=slice(None,None,-1)) if not numpy.ma.allequal(s4, s3c): markError('Negative stride failed')
markError('subRegion squeeze option failed') # mf 20010308 subRegion - extended wrap fw = cdms2.open(os.path.join(get_sample_data_dir(), 'ps.wrap.test.0E.nc')) ps = fw.getVariable('ps') ps1 = ps[:,:,36:] ps2 = ps[:,:,:37] s2 = numpy.ma.concatenate((ps1,ps2),axis=2) s2w = fw('ps',longitude=(-180,180,'ccn')) if not numpy.ma.allequal(s2,s2w): markError('subRegion extended wrap') varlist = fw.getVariables(spatial=1) u = f['u'] u1 = u[:,:,8:] u2 = u[:,:,:8] ucat = MV.concatenate((u1,u2),axis=2) su = u.subRegion(lon=(90,450,'co')) if not numpy.ma.allequal(ucat,su): markError('subRegion wrap, test 2') # negative strides fc = cdms2.Cdunif.CdunifFile(os.path.join(get_sample_data_dir(),'ps.wrap.test.0E.nc')) psc = fc.variables['ps'] psb = psc[:] s3c = psb[0,::-1] s4c = psb[0,::-2] s3 = fw('ps',latitude=(90,-90)) if not numpy.ma.allequal(s3,s3c): markError('Reverse interval failed') s4 = ps.getSlice(':',(None,None,-1)) # s4 = ps.subRegion(latitude=slice(None,None,-1)) if not numpy.ma.allequal(s4,s3c): markError('Negative stride failed')
def process(self, operation, num_inputs, output_file, process): grid = None gridder = operation.get_parameter('gridder') start = datetime.datetime.now() axes = operation.get_parameter('axes', True) axes = axes.values[0] self.log('Starting to process inputs') result_list = [] if len(operation.inputs) == 1 or num_inputs == 1: collections = [ file_manager.DataSetCollection.from_variables(operation.inputs) ] else: collections = [ file_manager.DataSetCollection.from_variables([x]) for x in operation.inputs ] with file_manager.FileManager(collections) as fm: output_list = [] var_name = fm.get_variable_name() with contextlib.nested(*[x for x in fm.collections]): over_temporal = fm.collections[0].datasets[0].get_time().id == axes for meta in fm.partitions(operation.domain, axes, num_inputs): data_list = [] axis_index = None for item in meta: ds, chunk = item if axis_index is None: axis_index = ds.get_variable().getAxisIndex(axes) if gridder is not None: if grid is None: grid = self.generate_grid(gridder, ds.spatial, chunk) if not over_temporal: chunk = chunk.regrid(grid, regridTool=gridder.tool, regridMethod=gridder.method) data_list.append(chunk) if len(data_list) == 0: break if len(data_list) > 1: result_data = process(*data_list) else: result_data = process(*data_list, axis=axis_index) self.log('Process output shape {}'.format(result_data.shape)) if over_temporal: result_list.append(result_data) else: output_file.write(result_data, id=var_name) if over_temporal: data = MV.concatenate(result_list) if grid is not None: data = data.regrid(grid, regridTool=gridder.tool, regridMethod=gridder.method) output_file.write(data, id=var_name) stop = datetime.datetime.now() final_shape = output_file[var_name].shape self.log('Finish retrieving all files, final shape "{}", elapsed time {}', final_shape, stop-start, percent=100) return var_name
def concat(self, contexts): """ Concatenate data chunks. Args: context (OperationContext): Current context. Returns: Updated context. """ context = OperationContext.merge_ingress(contexts) context.output_path = context.gen_public_path() nbytes = 0 start = datetime.datetime.now() with context.new_output(context.output_path) as outfile: for index, input in enumerate(context.sorted_inputs()): data = [] chunk_axis = None chunk_axis_index = None # Skip file if not mapped if input.mapped is None: logger.info('Skipping %r', input.filename) continue for file_path, _, chunk in input.chunks(input_index=index, context=context): logger.info('Chunk shape %r %r', file_path, chunk.shape) if chunk_axis is None: chunk_axis_index = chunk.getAxisIndex(input.chunk_axis) chunk_axis = chunk.getAxis(chunk_axis_index) # We can write chunks along the temporal axis immediately # otherwise we need to collect them to concatenate over # an axis if chunk_axis.isTime(): logger.info('Writing temporal chunk %r', chunk.shape) if context.units is not None: chunk.getTime().toRelativeTime(str(context.units)) if context.is_regrid: chunk = regrid_chunk(context, chunk, input.mapped) outfile.write(chunk, id=str(input.variable.var_name)) else: logger.info('Gathering spatial chunk') data.append(chunk) nbytes += chunk.nbytes # Concatenate chunks along an axis if chunk_axis is not None and not chunk_axis.isTime(): data = MV2.concatenate(data, axis=chunk_axis_index) if context.is_regrid: chunk = regrid_chunk(context, chunk, input.mapped) outfile.write(data, id=str(input.variable.var_name)) nbytes += chunk.nbytes elapsed = datetime.datetime.now() - start self.status('Processed {!r} bytes in {!r} seconds', nbytes, elapsed.total_seconds()) return context