def test_operations_two_steps(self): ## get the request dataset to use as the basis for the percentiles uri = self.test_data.get_uri('cancm4_tas') variable = 'tas' rd = RequestDataset(uri=uri,variable=variable) ## this is the underly OCGIS dataset object nc_basis = rd.get() ## NOTE: if you want to subset the basis by time, this step is necessary # nc_basis = nc_basis.get_between('temporal',datetime.datetime(2001,1,1),datetime.datetime(2003,12,31,23,59)) ## these are the values to use when calculating the percentile basis. it ## may be good to wrap this in a function to have memory freed after the ## percentile structure array is computed. all_values = nc_basis.variables[variable].value ## these are the datetime objects used for window creation temporal = nc_basis.temporal.value_datetime ## additional parameters for calculating the basis percentile = 10 width = 5 ## get the structure array from ocgis.calc.library.index.dynamic_kernel_percentile import DynamicDailyKernelPercentileThreshold daily_percentile = DynamicDailyKernelPercentileThreshold.get_daily_percentile(all_values,temporal,percentile,width) ## perform the calculation using the precomputed basis. in this case, ## the basis and target datasets are the same, so the RequestDataset is ## reused. calc_grouping = ['month','year'] kwds = {'percentile':percentile,'width':width,'operation':'lt','daily_percentile':daily_percentile} calc = [{'func':'dynamic_kernel_percentile_threshold','name':'tg10p','kwds':kwds}] ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='nc') ret = ops.execute() ## if we want to return the values as a three-dimenional numpy array the ## method below will do this. note the interface arrangement for the next ## release will alter this slightly. ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc, output_format='numpy') arrs = ops.execute() ## reference the returned numpy data. the first key is the geometry identifier. ## 1 in this case as this is the default for no selection geometry. the second ## key is the request dataset alias and the third is the calculation name. ## the variable name is appended to the end of the calculation to maintain ## a unique identifier. tg10p = arrs[1]['tas'].variables['tg10p'].value ## if we want the date information for the temporal groups date attributes date_parts = arrs[1]['tas'].temporal.date_parts assert(date_parts.shape[0] == tg10p.shape[1]) ## these are the representative datetime objects rep_dt = arrs[1]['tas'].temporal.value_datetime ## and these are the lower and upper time bounds on the date groups bin_bounds = arrs[1]['tas'].temporal.bounds_datetime ## confirm we have values for each month and year (12*10) ret_ds = nc.Dataset(ret) try: self.assertEqual(ret_ds.variables['tg10p'].shape,(120,64,128)) finally: ret_ds.close()
def __init__(self,uri=None,variable=None,interface_overload={},request_dataset=None): self.request_dataset = request_dataset if self.request_dataset is None: self.uri = uri self.variable = variable self.alias = None self.did = None if self.variable is None: try: self.ds = None rootgrp = nc.Dataset(uri) self.meta = NcMetadata(rootgrp) finally: rootgrp.close() else: from ocgis.api.request.base import RequestDataset kwds = {'uri':uri,'variable':variable} kwds.update(interface_overload) rd = RequestDataset(**kwds) self.ds = rd.get() self.meta = self.ds.meta else: self.uri = self.request_dataset.uri self.variable = self.request_dataset.variable self.ds = self.request_dataset.get() self.meta = self.ds.meta self.alias = self.request_dataset.alias self.did = self.request_dataset.did
def test_get_field_nonequivalent_units_in_source_data(self): new_path = self.test_data.copy_file('cancm4_tas', self.current_dir_output) # put non-equivalent units on the source data and attempto to conform with nc_scope(new_path, 'a') as ds: ds.variables['tas'].units = 'coulomb' rd = RequestDataset(uri=new_path, variable='tas', conform_units_to='celsius') with self.assertRaises(RequestValidationError): rd.get() # remove units altogether with nc_scope(new_path, 'a') as ds: ds.variables['tas'].delncattr('units') rd = RequestDataset(uri=new_path, variable='tas', conform_units_to='celsius') with self.assertRaises(NoUnitsError): rd.get()
def test_dev_get_split_shapefile(self): raise SkipTest('development only') self.set_debug() shp_path = '/home/benkoziol/l/data/nfie/linked_catchment_shapefiles/linked_13-RioGrande.shp' rd = RequestDataset(uri=shp_path) field = rd.get() self.log.debug('loading from file') field.geom.value node_count = map(get_node_count, field.geom.value) select = np.array(node_count) > 10000 to_split = field['GRIDCODE'][select] for gc in to_split.value.flat: self.log.debug('target gridcode: {}'.format(gc)) idx = np.where(field['GRIDCODE'].value == gc)[0][0] target_geom = field.geom.value[idx] split_geom = get_split_polygon_by_node_threshold(target_geom, 10000) # write_fiona(split_geom, gc) self.assertAlmostEqual(split_geom.area, target_geom.area) field.geom.value[idx] = split_geom self.assertAlmostEqual(field.geom.value[idx].area, target_geom.area) self.log.debug(field.geom.geom_type) # field.geom[select].parent.write('/tmp/rio-grande-assembled.shp', driver=DriverVector) # write_fiona(field.geom.value, 'rio-grande-assembled') self.log.debug('writing shapefile') field.write('/tmp/rio-grande-assembled.shp', driver=DriverVector)
def test_name(self): path = ShpCabinet().get_shp_path('state_boundaries') rd = RequestDataset(uri=path, driver='vector') self.assertIsNotNone(rd.name) rd = RequestDataset(uri=path, driver='vector', name='states') self.assertEqual(rd.name, 'states') field = rd.get() self.assertEqual(field.name, 'states')
def test_init_driver(self): uri = ShpCabinet().get_shp_path('state_boundaries') rd = RequestDataset(uri=uri, driver='vector') self.assertIsNotNone(rd.variable) self.assertIsInstance(rd.get(), Field) uri_nc = self.test_data.get_uri('cancm4_tas') rd = RequestDataset(uri_nc) self.assertIsInstance(rd.driver, DriverNetcdf) rd = RequestDataset(uri_nc, driver='vector') with self.assertRaises(ValueError): assert rd.variable
def test_with_overloads_real_data(self): # copy the test file as the calendar attribute will be modified rd = self.test_data.get_rd('cancm4_tas') filename = os.path.split(rd.uri)[1] dest = os.path.join(self.current_dir_output, filename) shutil.copy2(rd.uri, dest) # modify the calendar attribute with nc_scope(dest, 'a') as ds: self.assertEqual(ds.variables['time'].calendar, '365_day') ds.variables['time'].calendar = '365_days' # assert the calendar is in fact changed on the source file with nc_scope(dest, 'r') as ds: self.assertEqual(ds.variables['time'].calendar, '365_days') rd2 = RequestDataset(uri=dest, variable='tas') field = rd2.get() # the bad calendar will raise a value error when the datetimes are converted. with self.assertRaises(ValueError): field.temporal.value_datetime # overload the calendar and confirm the datetime values are the same as the datetime values from the original # good file rd3 = RequestDataset(uri=dest, variable='tas', t_calendar='365_day') field = rd3.get() self.assertNumpyAll(field.temporal.value_datetime, rd.get().temporal.value_datetime) # pass as a dataset collection to operations and confirm the data may be written to a flat file. dates are # converted in the process. time_range = (datetime.datetime(2001, 1, 1, 0, 0), datetime.datetime(2011, 1, 1, 0, 0)) dataset = [{'time_region': None, 'uri': dest, 'time_range': time_range, 'alias': u'tas', 't_units': u'days since 1850-1-1', 'variable': u'tas', 't_calendar': u'365_day'}] rdc = RequestDatasetCollection(dataset) ops = OcgOperations(dataset=rdc, geom='state_boundaries', select_ugid=[25], output_format=constants.OUTPUT_FORMAT_SHAPEFILE) ops.execute()
def test_keyword_time_subset_func(self): def _func_(value, bounds=None): indices = [] for ii, v in enumerate(value.flat): if v.month == 6: indices.append(ii) return indices rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20]) ret = ops.execute() for v in ret[20]['tas'].temporal.value_datetime: self.assertEqual(v.month, 6) rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20], output_format=constants.OUTPUT_FORMAT_NETCDF) ret = ops.execute() rd_out = RequestDataset(ret) for v in rd_out.get().temporal.value_datetime: self.assertEqual(v.month, 6)
def test_init_combinations(self): rd_orig = self.test_data.get_rd('cancm4_tas') dest_uri = os.path.join(self.current_dir_output, os.path.split(rd_orig.uri)[1]) shutil.copy2(rd_orig.uri, dest_uri) with nc_scope(dest_uri, 'a') as ds: var = ds.variables['tas'] outvar = ds.createVariable(var._name + 'max', var.dtype, var.dimensions) outvar[:] = var[:] + 3 outvar.setncatts(var.__dict__) with nc_scope(dest_uri) as ds: self.assertTrue(set(['tas', 'tasmax']).issubset(set(ds.variables.keys()))) keywords = dict( name=[None, 'foo'], uri=[None, dest_uri], variable=[None, 'tas', ['tas', 'tasmax'], 'crap'], alias=[None, 'tas', ['tas', 'tasmax'], ['tas_alias', 'tasmax_alias']], units=[None, [None, None], ['celsius', 'fahrenheit'], 'crap', [None, 'kelvin'], ['crap', 'crappy']], conform_units_to=[None, [None, None], ['celsius', 'fahrenheit'], 'crap', [None, 'kelvin'], ['crap', 'crappy'], [None, 'coulomb'], ['coulomb', 'coulomb']]) def itr_row(key, sequence): for element in sequence: yield ({key: element}) def itr_products_keywords(keywords): iterators = [itr_row(ki, vi) for ki, vi in keywords.iteritems()] for dictionaries in itertools.product(*iterators): yld = {} for dictionary in dictionaries: yld.update(dictionary) yield yld for k in itr_products_keywords(keywords): try: rd = RequestDataset(**k) self.assertEqual(rd._source_metadata, None) self.assertEqual(len(get_tuple(rd.variable)), len(get_tuple(rd.units))) if k['name'] is None: self.assertEqual(rd.name, '_'.join(get_tuple(rd.alias))) else: self.assertEqual(rd.name, 'foo') for v in rd._variable: try: self.assertTrue(v in rd.source_metadata['variables'].keys()) except VariableNotFoundError: if 'crap' in rd._variable: self.assertEqual(rd._source_metadata, None) break if k['units'] is None and len(rd._variable) == 1: self.assertEqual(rd.units, None) self.assertEqual(rd._units, None) try: field = rd.get() self.assertEqual(field.name, rd.name) self.assertEqual(set(field.variables.keys()), set(get_tuple(rd.alias))) except VariableNotFoundError: if 'crap' in rd._variable: continue else: raise except RequestValidationError: if 'coulomb' in get_tuple(k['conform_units_to']): continue else: raise except RequestValidationError as e: # uris cannot be None if k['uri'] is None: pass # variables cannot be None elif k['variable'] is None: pass # 'crap' is not a real variable name elif k['conform_units_to'] is not None and (k['conform_units_to'] == 'crap' or \ 'crap' in k['conform_units_to']): pass # conform_units_to must match units element-wise elif k['conform_units_to'] is not None and k['variable'] is not None and \ len(k['conform_units_to']) != len(k['variable']): pass # aliases must occur for each variable elif len(get_tuple(k['alias'])) != len(get_tuple(k['variable'])): pass # units must occur for each variable elif len(get_tuple(k['units'])) != len(get_tuple(k['variable'])): pass # bad unit definition # 'crap' is not a real variable name elif k['units'] is not None and (k['units'] == 'crap' or \ 'crap' in k['units']): pass # alway need a uri and variable elif k['uri'] is None: pass else: raise except: raise
def test_init_variable_not_found(self): rd = self.test_data.get_rd('cancm4_tas') rd_bad = RequestDataset(uri=rd.uri, variable='crap') with self.assertRaises(VariableNotFoundError): rd_bad.get()
class Inspect(object): """ Inspect a local or remote dataset returning a printout similar to `ncdump`_. >>> from ocgis import Inspect ... >>> # Just do an dataset attribute dump. >>> ip = Inspect('/my/local/dataset') >>> print(ip) ... >>> # Get variable-specific info. >>> ip = Inspect('/my/local/dataset',variable='tas') >>> print(ip) :param uri: Absolute path to data's location. :type uri: str :param variable: Specific variable to inspect. :type variable: str :param interface_overload: Overloads for autodiscover. :type interface_overload: dict :param meta: Use this metadata object in place of the one created internally. :type meta: :class:`ocgis.NcMetadata` .. _ncdump: http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/ncdump.html """ def __init__(self, uri=None, variable=None, request_dataset=None, meta=None): if meta is None and uri is None and request_dataset is None: raise ValueError('At least one of "uri", "request_dataset", or "meta" must be provided.') self.uri = uri self.variable = variable self.meta = meta self.request_dataset = request_dataset if self.request_dataset is None and self.meta is None: if self.uri is not None and self.variable is not None: from ocgis.api.request.base import RequestDataset self.request_dataset = RequestDataset(uri=self.uri, variable=self.variable) self.alias = None self.did = None self.ds = None if self.request_dataset is None: if self.meta is None: rootgrp = nc.Dataset(self.uri) try: self.meta = NcMetadata(rootgrp) finally: rootgrp.close() else: self.uri = self.request_dataset.uri self.variable = self.request_dataset.variable self.ds = self.request_dataset.get() self.meta = self.ds.meta self.alias = self.request_dataset.alias self.did = self.request_dataset.did def __repr__(self): msg = '' if self.request_dataset is None: lines = self.get_report_no_variable() else: lines = self.get_report() for line in lines: msg += line + '\n' return msg @property def _t(self): return self.ds.temporal @property def _s(self): return self.ds.spatial @property def _l(self): return self.ds.level def get_temporal_report(self): try: if self._t.format_time: res = int(self._t.resolution) try: start_date, end_date = self._t.extent_datetime # # the times may not be formattable except ValueError as e: if e.message == 'year is out of range': start_date, end_date = self._t.extent else: ocgis_lh(exc=e, logger='inspect') else: res = 'NA (non-formatted times requested)' start_date, end_date = self._t.extent # # raised if the temporal dimension has a single value. possible with ## snippet or a small dataset... except ResolutionError: res, start_date, end_date = ['NA (singleton)'] * 3 n = len(self._t.value) # # if the calendar attribute is not set, the feature should not be masked calendar = self.meta['variables'][self._t.name]['attrs'].get('calendar') if calendar is None: calendar = 'None (will assume "standard")' units = self._t.units lines = [] lines.append(' Start Date = {0}'.format(start_date)) lines.append(' End Date = {0}'.format(end_date)) lines.append(' Calendar = {0}'.format(calendar)) lines.append(' Units = {0}'.format(units)) lines.append('Resolution (Days) = {0}'.format(res)) lines.append(' Count = {0}'.format(n)) ## append information on bounds if self._t.bounds is not None: has_bounds = True else: has_bounds = False lines.append(' Has Bounds = {0}'.format(has_bounds)) return lines def get_spatial_report(self): res = self._s.grid.resolution extent = self._s.grid.extent itype = self._s.geom.get_highest_order_abstraction().__class__.__name__ projection = self.ds.spatial.crs lines = [] lines.append('Spatial Reference = {0}'.format(projection.__class__.__name__)) lines.append(' Proj4 String = {0}'.format(projection.sr.ExportToProj4())) lines.append(' Extent = {0}'.format(extent)) lines.append(' Interface Type = {0}'.format(itype)) lines.append(' Resolution = {0}'.format(res)) lines.append(' Count = {0}'.format(self._s.grid.uid.reshape(-1).shape[0])) return lines def get_level_report(self): if self._l is None: lines = ['No level dimension found.'] else: lines = [] lines.append('Level Variable = {0}'.format(self._l.name)) lines.append(' Count = {0}'.format(self._l.value.shape[0])) # # append information on bounds if self._l.bounds is not None: has_bounds = True else: has_bounds = False lines.append(' Has Bounds = {0}'.format(has_bounds)) return lines def get_dump_report(self): return (self.meta._get_lines_()) def get_report_no_variable(self): lines = ['', 'URI = {0}'.format(self.uri)] lines.append('VARIABLE = {0}'.format(self.variable)) lines.append('') lines += self.get_dump_report() return lines def get_report(self): # # a variable target is required for this method if self.variable is None: raise (AttributeError('A "variable" target is required.')) mp = [ {'=== Temporal =============': self.get_temporal_report}, {'=== Spatial ==============': self.get_spatial_report}, {'=== Level ================': self.get_level_report}, {'=== Dump =================': self.get_dump_report} ] lines = ['', 'URI = {0}'.format(self.uri)] lines.append('VARIABLE = {0}'.format(self.variable)) lines.append('ALIAS = {0}'.format(self.alias)) lines.append('DID = {0}'.format(self.did)) lines.append('') for dct in mp: for key, value in dct.iteritems(): lines.append(key) lines.append('') for line in value(): lines.append(line) lines.append('') return lines def _as_dct_(self): ret = self.meta.copy() # # without a target variable, attempt to set start and end dates. if self.variable is None: ds = nc.Dataset(self.uri, 'r') try: time = ds.variables['time'] time_bounds = [time[0], time[-1]] time_bounds = nc.num2date(time_bounds, time.units, calendar=time.calendar) derived = {'Start Date': str(time_bounds[0]), 'End Date': str(time_bounds[1])} except: warn('Time variable not found or improperly attributed. Setting "derived" key to None.') derived = None finally: ds.close() ## we can get derived values else: derived = OrderedDict() to_add = self.get_temporal_report() + self.get_spatial_report() + self.get_level_report() for row in to_add: try: key, value = re.split(' = ', row, maxsplit=1) ## here to catch oddities of the returns except ValueError: if row == 'No level dimension found.': continue else: raise key = key.strip() derived.update({key: value}) ret.update({'derived': derived}) return ret