def test_subset_with_shapefile_no_ugid(self): """Test a subset operation using a shapefile without a UGID attribute.""" output_format = [constants.OUTPUT_FORMAT_NUMPY, constants.OUTPUT_FORMAT_CSV_SHAPEFILE] geom = self.get_shapefile_path_with_no_ugid() geom_select_uid = [8, 11] geom_uid = 'ID' rd = self.test_data.get_rd('cancm4_tas') for of in output_format: ops = OcgOperations(dataset=rd, geom=geom, geom_select_uid=geom_select_uid, geom_uid=geom_uid, snippet=True, output_format=of) self.assertEqual(len(ops.geom), 2) ret = ops.execute() if of == constants.OUTPUT_FORMAT_NUMPY: for element in geom_select_uid: self.assertIn(element, ret) self.assertEqual(ret.properties[8].dtype.names, ('STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR')) else: with open(ret) as f: reader = DictReader(f) row = reader.next() self.assertIn(geom_uid, row.keys()) self.assertNotIn(env.DEFAULT_GEOM_UID, row.keys()) shp_path = os.path.split(ret)[0] shp_path = os.path.join(shp_path, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(shp_path) as source: record = source.next() self.assertIn(geom_uid, record['properties']) self.assertNotIn(env.DEFAULT_GEOM_UID, record['properties'])
def test_get_converter(self): rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd) outdir = self.current_dir_output prefix = 'foo' interp = OcgInterpreter(ops) so = OperationsEngine(ops) ret = interp._get_converter_(NumpyConverter, outdir, prefix, so) self.assertIsInstance(ret, NumpyConverter) # Test melted is registered by the converter. ops = OcgOperations(dataset=rd, melted=True, output_format=constants.OutputFormatName.SHAPEFILE) interp = OcgInterpreter(ops) ret = interp._get_converter_(ShpConverter, outdir, prefix, so) self.assertIsInstance(ret, ShpConverter) self.assertTrue(ret.melted) # Test options are passed to the underlying converter. opts = {'data_model': 'foo'} ops = OcgOperations(dataset=rd, output_format='nc', output_format_options=opts) interp = OcgInterpreter(ops) ret = interp._get_converter_(NcConverter, outdir, prefix, so) self.assertDictEqual(ret.options, opts)
def test1d(self): p1 = self.write_field_data('v1', ncol=1, nrow=1) p3 = self.write_field_data('v1', dir='b') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] reference = ocgis.RequestDataset(p1, time_range=ref_range).get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] candidate = ocgis.RequestDataset(p3, time_range=cand_range) calc = [{ 'func': 'dissimilarity', 'name': 'output_1d', 'kwds': { 'target': reference, 'candidate': ('v1', ) } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables[0], ('dissimilarity')) dist = actual_field['dissimilarity'] self.assertEqual(dist.shape, (1, 1, 2, 2))
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii, values in enumerate(itertools.product(*its)): if self.target_combo is not None: if self.target_combo > ii: continue kwds = {} for val in values: kwds.update(val) if not self.ops_only: kwds.update({'dir_output': tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: yld = (ii, ops) else: ret = ops.execute() yld = (ii, ops, ret) yield (yld) except Exception as e: tb = traceback.format_exc() try: self.check_exception(ii, kwds, e, tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output'])
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii,values in enumerate(itertools.product(*its)): if self.target_combo is not None: if self.target_combo > ii: continue kwds = {} for val in values: kwds.update(val) if not self.ops_only: kwds.update({'dir_output':tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: yld = (ii,ops) else: ret = ops.execute() yld = (ii,ops,ret) yield(yld) except Exception as e: tb = traceback.format_exc() try: self.check_exception(ii,kwds,e,tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output'])
def test(self): path1 = self.write_field_data('data1') path2 = self.write_field_data('data2') path3 = self.write_field_data('basis_var') time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)] rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]] mrd = MultiRequestDataset(rds) basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)]) basis_field = basis.get() calc = [{'func': 'mfpf', 'name': 'output_mfpf', 'kwds': {'reference': ('data1', 'data2'), 'basis': basis_field}}] ops = OcgOperations(dataset=mrd, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var')) sums = [v.get_value().sum() for v in actual_field.data_variables] for s in sums: self.assertAlmostEqual(s, 7.8071042497325145)
def run_op(resource, calc, options): """Create an OCGIS operation, launch it and return the results.""" from os.path import abspath, curdir from ocgis import OcgOperations, RequestDataset, env import uuid LOGGER.info('Start ocgis module call function') # Prepare the environment env.OVERWRITE = True dir_output = abspath(curdir) prefix = str(uuid.uuid1()) env.PREFIX = prefix rd = [ RequestDataset(val, variable=key if key != 'resource' else None) for key, val in resource.items() ] ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=options['calc_grouping'], dir_output=dir_output, prefix=prefix, add_auxiliary_files=False, output_format='nc') return ops.execute()
def test_icclim(self): rd = RequestDataset(**self.get_dataset()) calc = [{'func': 'icclim_TG', 'name': 'TG'}] calc_grouping = ['month', 'year'] ret = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping).execute() self.assertEqual( ret.get_element(variable_name='TG').get_value().mean(), 2.5)
def test_esmf(self): rd1 = RequestDataset(**self.get_dataset()) rd2 = deepcopy(rd1) ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc') ret = ops.execute() ignore_attributes = {'time_bnds': ['units', 'calendar'], 'global': ['history'], 'foo': ['grid_mapping']} ignore_variables = ['latitude_longitude'] self.assertNcEqual(ret, rd1.uri, ignore_attributes=ignore_attributes, ignore_variables=ignore_variables)
def test_esmf(self): rd1 = RequestDataset(**self.get_dataset()) rd2 = deepcopy(rd1) ops = OcgOperations(dataset=rd1, regrid_destination=rd2, output_format='nc') ret = ops.execute() actual_value = RequestDataset(ret).get().data_variables[0].get_value() desired_value = rd1.get().data_variables[0].get_value() self.assertNumpyAllClose(actual_value, desired_value)
def test_shapefile_through_operations(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys())) self.assertEqual((51,), field2.data_variables[0].shape)
def test_many_request_datasets(self): """Test numerous request datasets.""" rd_base = self.test_data.get_rd('cancm4_tas') geom = [-74.0, 40.0, -72.0, 42.0] rds = [deepcopy(rd_base) for ii in range(500)] for rd in rds: ret = OcgOperations(dataset=rd, geom=geom, snippet=True).execute() actual = ret.get_element(variable_name='tas').shape self.assertEqual(actual, (1, 2, 1))
def test_write(self): # test melted format for melted in [False, True]: kwargs_ops = dict(melted=melted) kwargs_conv = dict(outdir=tempfile.mkdtemp(dir=self.current_dir_output)) conv = self.get(kwargs_ops=kwargs_ops, kwargs_conv=kwargs_conv) csv_path = conv.write() self.assertTrue(os.path.exists(csv_path)) self.assertEqual(conv._ugid_gid_store, {1: {18: [5988, 5989, 5990, 6116, 6117, 6118], 15: [5992, 6119, 6120]}}) shp_path = os.path.split(csv_path)[0] shp_path = os.path.join(shp_path, 'shp') shp_path_gid = os.path.join(shp_path, 'foo_gid.shp') target = RequestDataset(shp_path_gid).get() self.assertEqual(target.shape[-1], 9) shp_path_ugid = os.path.join(shp_path, 'foo_ugid.shp') target = RequestDataset(shp_path_ugid).get() self.assertEqual(target.shape[-1], 2) # test aggregating the selection geometry rd1 = self.test_data.get_rd('cancm4_tasmax_2011') rd2 = self.test_data.get_rd('maurer_bccr_1950') keywords = dict(agg_selection=[True, False]) for k in self.iter_product_keywords(keywords): ops = OcgOperations(dataset=[rd1, rd2], snippet=True, output_format='csv-shp', geom='state_boundaries', agg_selection=k.agg_selection, select_ugid=[32, 47], prefix=str(k.agg_selection)) ret = ops.execute() directory = os.path.split(ret)[0] path_ugid = os.path.join(directory, 'shp', '{0}_ugid.shp'.format(ops.prefix)) with fiona.open(path_ugid) as source: records = list(source) if k.agg_selection: uids = [1] else: uids = [32, 47] self.assertEqual([r['properties'][env.DEFAULT_GEOM_UID] for r in records], uids) path_gid = os.path.join(directory, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(path_gid) as source: uid = [r['properties'][env.DEFAULT_GEOM_UID] for r in source] if k.agg_selection: self.assertAsSetEqual(uid, [1]) else: uid = np.array(uid) self.assertEqual(np.sum(uid == 32), 1915) self.assertEqual(np.sum(uid == 47), 923) meta = os.path.join(os.path.split(ret)[0], '{0}_source_metadata.txt'.format(ops.prefix)) with open(meta, 'r') as f: lines = f.readlines() self.assertTrue(len(lines) > 50)
def test_shapefile_through_operations_subset(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() self.assertIsNone(field.spatial.properties) ops = OcgOperations(dataset=rd, output_format='shp', geom=path, select_ugid=[15]) ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(field.variables.keys(), field2.variables.keys()) self.assertEqual(tuple([1] * 5), field2.shape)
def test_shapefile_through_operations(self): path = ShpCabinet().get_shp_path('state_boundaries') rd = RequestDataset(path) field = rd.get() self.assertIsNone(field.spatial.properties) ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual(field.variables.keys(), field2.variables.keys()) self.assertEqual(field.shape, field2.shape)
def test_system_many_request_datasets(self): """Test numerous request datasets.""" rd_base = self.test_data.get_rd('cancm4_tas') geom = [-74.0, 40.0, -72.0, 42.0] rds = [deepcopy(rd_base) for ii in range(500)] for rd in rds: ops = OcgOperations(dataset=rd, geom=geom, snippet=True) ret = ops.execute() actual = ret.get_element(variable_name='tas').shape self.assertEqual(actual, (1, 2, 1))
def test_shapefile_through_operations(self): path = os.path.join(self.path_bin, 'shp', 'state_boundaries', 'state_boundaries.shp') rd = RequestDataset(path) field = rd.get() ops = OcgOperations(dataset=rd, output_format='shp') ret = ops.execute() rd2 = RequestDataset(ret) field2 = rd2.get() self.assertAsSetEqual( list(field.keys()) + [HeaderName.ID_GEOMETRY], list(field2.keys())) self.assertEqual((51, ), field2.data_variables[0].shape)
def test(self): import logbook log = logbook.Logger(name='combos', level=logbook.INFO) for key, dataset in self.iter_dataset(): # if key != 'qed_2013_TNn_annual_min': continue # these datasets have only one time element if key in ('qed_2013_TNn_annual_min', 'qed_2013_TasMin_seasonal_max_of_seasonal_means', 'qed_2013_climatology_Tas_annual_max_of_annual_means', 'qed_2013_maurer02v2_median_txxmmedm_january_1971-2000', 'qed_2013_maurer02v2_median_txxmmedm_february_1971-2000', 'qed_2013_maurer02v2_median_txxmmedm_march_1971-2000', 'snippet_maurer_dtr', 'snippet_seasonalbias'): slc = None else: slc = [None, [10, 20], None, None, None] # this has different data types on the bounds for the coordinate variables. they currently get casted by the # software. if key == 'maurer_bcca_1991': check_types = False else: check_types = True log.debug('processing: {0} ({1})'.format(key, dataset.__class__.__name__)) ops = OcgOperations(dataset=dataset, output_format='nc', prefix='nc1', slice=slc) try: log.debug('initial write...') ret1 = ops.execute() except ValueError: # realization dimensions may not be written to netCDF yet if key == 'cmip3_extraction': continue else: raise else: try: ops2 = OcgOperations(dataset={'uri': ret1}, output_format='nc', prefix='nc2') log.debug('second write...') ret2 = ops2.execute() log.debug('comparing...') self.assertNcEqual(ret1, ret2, ignore_attributes={'global': ['history']}, check_types=check_types) finally: for path in [ret1, ret2]: folder = os.path.split(path)[0] shutil.rmtree(folder) log.debug('success')
def test_disjoint_polygons(self): """Test mesh regridding with the source destination containing disjoint polygons.""" ESMF.Manager(debug=True) self.set_debug(True) path_shp = os.path.join(self.path_bin, 'three_polygons', 'three_polygons.shp') path_out_nc = self.get_temporary_file_path('ugrid.nc') path_source_nc = self.get_temporary_file_path('source.nc') mesh_name = 'mesh' self.log.debug('creating source netcdf') row = np.linspace(-1, 1, 10) col = np.linspace(-1, 1, 10) self.create_source_netcdf_data(path_source_nc, row=row, col=col) ops = OcgOperations(dataset={'uri': path_source_nc}, output_format='shp', snippet=True, prefix='source_shp', dir_output=self.path_current_tmp) ops.execute() self.log.debug('creating ugrid file: {}'.format(path_out_nc)) gm = GeometryManager('SPECIAL', path=path_shp) geoms = [r['geom'] for r in gm.iter_records()] mp = MultiPolygon(geoms) # mp = box(-0.25, -0.25, 0.25, 0.25) records = [{'geom': mp, 'properties': {'UGID': 123}}] gm = GeometryManager('UGID', records=records, allow_multipart=True) fm = get_flexible_mesh(gm, mesh_name, False, False) fm.save_as_netcdf(path_out_nc, kwargs_dataset={'format': 'NETCDF3_CLASSIC'}) self.log.debug('getting source field') srcgrid = ESMF.Grid(filename=path_source_nc, filetype=ESMF.FileFormat.GRIDSPEC, coord_names=['longitude', 'latitude'], add_corner_stagger=True) srcfield = get_field_src(srcgrid, path_source_nc, 'pr') self.log.debug('getting destination grid') dstgrid = ESMF.Mesh(filename=path_out_nc, filetype=ESMF.FileFormat.UGRID, meshname=mesh_name) self.log.debug('getting destination field') dstfield = ESMF.Field(dstgrid, "dstfield", meshloc=ESMF.MeshLoc.ELEMENT, ndbounds=[srcfield.data.shape[0]]) self.log.debug('creating regrid object') regrid = ESMF.Regrid(srcfield, dstfield, regrid_method=ESMF.RegridMethod.CONSERVE, unmapped_action=ESMF.UnmappedAction.ERROR) # "zero_region" only weighted data will be touched. self.log.debug('executing regrid') dstfield = regrid(srcfield, dstfield, zero_region=ESMF.Region.SELECT) self.assertEqual(dstfield.data.shape, (366, 1)) print dstfield.data self.log.debug('success')
def test_system_regrid_field_nonoverlapping_extents(self): """Test regridding with fields that do not spatially overlap.""" rd = self.test_data.get_rd('cancm4_tas') # nebraska and california coll = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[16, 25], snippet=True, vector_wrap=False).execute() source = coll.get_element(container_ugid=25) destination = coll.get_element(container_ugid=16) with self.assertRaises(RegriddingError): from ocgis.regrid.base import regrid_field regrid_field(source, destination)
def test_regrid_field_partial_extents(self): """Test regridding with fields that partially overlap.""" rd = self.test_data.get_rd('cancm4_tas') # california and nevada coll = OcgOperations(dataset=rd, geom='state_boundaries', select_ugid=[23, 25], snippet=True, vector_wrap=False).execute() source = coll.get_element(container_ugid=23) destination = coll.get_element(container_ugid=25) from ocgis.regrid.base import regrid_field res = regrid_field(source, destination) self.assertEqual(res['tas'].get_mask().sum(), 11)
def test_sql_where_through_operations(self): """Test using a SQL where statement to select some geometries.""" states = ("Wisconsin", "Vermont") s = 'STATE_NAME in {0}'.format(states) rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True) ret = ops.execute() self.assertEqual(len(ret), 2) self.assertEqual(ret.keys(), [8, 10]) for v in ret.properties.itervalues(): self.assertIn(v['STATE_NAME'], states) # make sure the sql select has preference over uid ops = OcgOperations(dataset=rd, geom_select_sql_where=s, geom='state_boundaries', snippet=True, geom_select_uid=[500, 600, 700]) ret = ops.execute() self.assertEqual(len(ret), 2) for v in ret.properties.itervalues(): self.assertIn(v['STATE_NAME'], states) # test possible interaction with geom_uid path = self.get_shapefile_path_with_no_ugid() ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s) ret = ops.execute() self.assertEqual(ret.keys(), [1, 2]) ops = OcgOperations(dataset=rd, geom=path, geom_select_sql_where=s, geom_uid='ID') ret = ops.execute() self.assertEqual(ret.keys(), [13, 15])
def test_time_region(self): uri = 'C:/testclip/WSI_OCGIS_abdu.1979.nc' shp = 'C:/testclip/state.shp' rd = RequestDataset(uri=uri) calc = [{'func': 'sum', 'name': 'sum'}] ops_one = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_one_month = ops_one.execute() ops_two = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [2]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_two_month = ops_two.execute() ops_original = OcgOperations(dataset=rd, output_format='numpy', time_region={'month': [1, 2]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_original = ops_original.execute() desired = ret_original[1]['forcalc'].variables['sum'].value # 11.580645161290322 ops_no_time_region = OcgOperations(dataset=rd, output_format='numpy', spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping='day', prefix='calc', geom_select_sql_where='STATE_NAME="Alabama"') ret_no_time_region = ops_no_time_region.execute() field = ret_no_time_region[1]['forcalc'] indices = [] for idx in range(field.temporal.shape[0]): the_time = field.temporal.value_datetime[idx] if the_time.month in [1, 2]: indices.append(idx) var_sub = field.variables['sum'][:, indices, :, :, :] actual = var_sub.value self.assertNumpyAll(actual, desired)
def test_multipoint_buffering_and_union(self): """Test subset behavior using MultiPoint geometries.""" pts = [Point(3.8, 28.57), Point(9.37, 33.90), Point(17.04, 27.08)] mp = MultiPoint(pts) rd = self.test_data.get_rd('cancm4_tas') coll = OcgOperations(dataset=rd, output_format=constants.OutputFormatName.OCGIS, snippet=True, geom=mp).execute() mu1 = coll.get_element(variable_name='tas').get_masked_value().sum() nc_path = OcgOperations(dataset=rd, output_format='nc', snippet=True, geom=mp).execute() with self.nc_scope(nc_path) as ds: var = ds.variables['tas'] mu2 = var[:].sum() self.assertEqual(mu1, mu2)
def test_execute_directory(self): """Test that the output directory is removed appropriately following an operations failure.""" kwds = dict(add_auxiliary_files=[True, False]) rd = self.test_data.get_rd('cancm4_tas') ## this geometry is outside the domain and will result in an exception geom = [1000, 1000, 1100, 1100] for k in itr_products_keywords(kwds, as_namedtuple=True): ops = OcgOperations(dataset=rd, output_format='csv', add_auxiliary_files=k.add_auxiliary_files, geom=geom) try: ret = ops.execute() except ExtentError: contents = os.listdir(self._test_dir) self.assertEqual(len(contents), 0)
def test_system_through_operations(self): """Test calculation through operations.""" row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y') col = Variable(name='x', value=[10, 11, 12], dimensions='x') grid = Grid(col, row) time = TemporalVariable(name='time', value=[1, 2], dimensions='time') data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions)) data.get_value()[0, :] = 1 data.get_value()[1, :] = 2 field = Field(grid=grid, time=time, is_data=data) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret.get_element(variable_name='sum').get_masked_value().flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_full(self): """Compute the dissimilarity will all metrics.""" from flyingpigeon import dissimilarity from matplotlib import pyplot as plt p1 = self.write_field_data('v1', ncol=1, nrow=1) p2 = self.write_field_data('v2', ncol=1, nrow=1) p3 = self.write_field_data('v1', ncol=11, nrow=10, dir='c') p4 = self.write_field_data('v2', ncol=11, nrow=10, dir='c') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] ref = [ocgis.RequestDataset(p, time_range=ref_range) for p in [p1, p2]] reference = ocgis.MultiRequestDataset(ref) reference = reference.get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] can = [ ocgis.RequestDataset(p, time_range=cand_range) for p in [p3, p4] ] candidate = ocgis.MultiRequestDataset(can) fig, axes = plt.subplots(2, 3) for i, dist in enumerate(dissimilarity.__all__): calc = [{ 'func': 'dissimilarity', 'name': 'output_mfpf', 'kwds': { 'target': reference, 'candidate': ('v1', 'v2'), 'dist': dist } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() out_field = ret.get_element() var_name = get_variable_names(out_field.data_variables)[0] out = out_field[var_name].get_value()[0, 0] axes.flat[i].imshow(out) axes.flat[i].set_title(dist) path = os.path.join(test_output_path, 'test_spatial_analog_metrics.png') plt.savefig(path) plt.close()
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value() - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
def test_validate_ops(self): rd = self.test_data.get_rd('cancm4_tas') # Test only one request dataset allowed for metadata JSON output. rd2 = deepcopy(rd) rd2.alias = 'foo' with self.assertRaises(DefinitionValidationError): OcgOperations( dataset=[rd, rd2], output_format=constants.OutputFormatName.METADATA_JSON) # Test fields are not convertible to metadata JSON. field = rd.get() with self.assertRaises(DefinitionValidationError): OcgOperations( dataset=field, output_format=constants.OutputFormatName.METADATA_JSON)
def test_execute_directory(self): """Test that the output directory is removed appropriately following an operations failure.""" kwds = dict(add_auxiliary_files=[True, False]) rd = self.test_data.get_rd('cancm4_tas') # this geometry is outside the domain and will result in an exception geom = [1000, 1000, 1100, 1100] for k in itr_products_keywords(kwds, as_namedtuple=True): ops = OcgOperations(dataset=rd, output_format='csv', add_auxiliary_files=k.add_auxiliary_files, geom=geom) try: ops.execute() except ExtentError: contents = os.listdir(self.current_dir_output) self.assertEqual(len(contents), 0)
def test_subset_with_shapefile_no_ugid(self): """Test a subset operation using a shapefile without a UGID attribute.""" output_format = [ constants.OutputFormatName.OCGIS, constants.OutputFormatName.CSV_SHAPEFILE ] geom = self.get_shapefile_path_with_no_ugid() geom_select_uid = [8, 11] geom_uid = 'ID' rd = self.test_data.get_rd('cancm4_tas') for of in output_format: ops = OcgOperations(dataset=rd, geom=geom, geom_select_uid=geom_select_uid, geom_uid=geom_uid, snippet=True, output_format=of) self.assertEqual(len(ops.geom), 2) ret = ops.execute() if of == constants.OutputFormatName.OCGIS: for element in geom_select_uid: self.assertIn(element, ret.children) self.assertAsSetEqual( list(ret.properties[8].keys()), ['STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR']) else: with open(ret) as f: reader = DictReader(f) row = next(reader) self.assertIn(geom_uid, list(row.keys())) self.assertNotIn(env.DEFAULT_GEOM_UID, list(row.keys())) shp_path = os.path.split(ret)[0] shp_path = os.path.join(shp_path, 'shp', '{0}_gid.shp'.format(ops.prefix)) with fiona.open(shp_path) as source: record = next(source) self.assertIn(geom_uid, record['properties']) self.assertNotIn(env.DEFAULT_GEOM_UID, record['properties'])
def test_validate(self): FunctionRegistry.append(MockAbstractMultivariateFunction) rd1 = self.test_data.get_rd('cancm4_tas') rd1._rename_variable = 'tas2' rd2 = deepcopy(rd1) rd2._rename_variable = 'pr2' # test non-string keyword arguments will not raise an exception calc = [{ 'func': 'fmv', 'name': 'fmv', 'kwds': { 'tas': 'tas2', 'pr': 'pr2', 'random': {} } }] OcgOperations(dataset=[rd1, rd2], calc=calc) # test with an alias map missing calc = [{ 'func': 'fmv', 'name': 'fmv', 'kwds': { 'pr': 'pr2', 'random': {} } }] with self.assertRaises(DefinitionValidationError): OcgOperations(dataset=[rd1, rd2], calc=calc) # test with the wrong alias mapped calc = [{ 'func': 'fmv', 'name': 'fmv', 'kwds': { 'tas': 'tas2', 'pr': 'pr3', 'random': {} } }] with self.assertRaises(DefinitionValidationError): OcgOperations(dataset=[rd1, rd2], calc=calc)
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii, values in enumerate(itertools.product(*its)): if self.n_only: yield (ii) continue if self.target_combo is not None: if self.target_combo > ii: continue yield (ii) kwds = {} for val in values: ## check for environmental parameters if val.keys()[0].isupper(): setattr(env, val.keys()[0], val.values()[0]) else: kwds.update(val) if not self.ops_only: kwds.update({'dir_output': tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: pass else: ret = ops.execute() except Exception as e: raise tb = traceback.format_exc() try: self.check_exception(ii, kwds, e, tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output']) env.reset()
def __iter__(self): its = [p().__iter__() for p in self.get_parameters()] for ii,values in enumerate(itertools.product(*its)): if self.n_only: yield(ii) continue if self.target_combo is not None: if self.target_combo > ii: continue yield(ii) kwds = {} for val in values: ## check for environmental parameters if val.keys()[0].isupper(): setattr(env,val.keys()[0],val.values()[0]) else: kwds.update(val) if not self.ops_only: kwds.update({'dir_output':tempfile.mkdtemp()}) try: try: ops = OcgOperations(**kwds) try: self.check_blocked(ops) except BlockedCombination: continue if self.verbose: print(ii) if self.ops_only: pass else: ret = ops.execute() except Exception as e: raise tb = traceback.format_exc() try: self.check_exception(ii,kwds,e,tb) except: raise finally: if not self.ops_only and self.remove_output: shutil.rmtree(kwds['dir_output']) env.reset()
def test_build(self): path = self.get_shapefile_path_with_no_ugid() keywords = dict(geom_uid=['ID', None]) rd = self.test_data.get_rd('cancm4_tas') for k in self.iter_product_keywords(keywords): if k.geom_uid is None: geom_select_uid = None else: geom_select_uid = [8] ops = OcgOperations(dataset=rd, geom=path, geom_uid=k.geom_uid, geom_select_uid=geom_select_uid, snippet=True) coll = ops.execute() conv = CsvShapefileConverter([coll], outdir=self.current_dir_output, prefix='foo', overwrite=True, ops=ops) ret = conv._build_(coll) if k.geom_uid is None: actual = env.DEFAULT_GEOM_UID else: actual = k.geom_uid actual = [constants.HEADERS.ID_DATASET.upper(), actual, constants.HEADERS.ID_GEOMETRY.upper()] self.assertEqual(actual, ret['fiona_object'].meta['schema']['properties'].keys())
def _handler(self, request, response): try: ocgis.env.DIR_OUTPUT = tempfile.mkdtemp(dir=os.getcwd()) ocgis.env.OVERWRITE = True nc_files = archiveextract(resource=rename_complexinputs( request.inputs['resource'])) rd = RequestDataset(nc_files) rd.dimension_map.set_bounds('time', None) if nc_files[0][-3:] == '.nc': out_prefix = nc_files[0][:-3] + '_merged' else: out_prefix = nc_files[0] + '_merged' ops = OcgOperations(dataset=rd, output_format='nc', prefix=out_prefix) ret = ops.execute() response.outputs['output'].file = ret response.outputs['output'].output_format = \ Format('application/x-netcdf') return response except: raise Exception(traceback.format_exc())
def test_system_through_operations(self): ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }]) ret = ops.execute() actual_field = ret.get_element() actual_variable = actual_field['my_mff'] self.assertEqual(actual_variable.attrs['long_name'], MockFieldFunction.long_name) self.assertEqual(actual_variable.get_value().tolist(), self.desired_value) self.assertNotIn('data', list(actual_field.keys())) # Test writing output to netCDF. ops = OcgOperations(dataset=self.field_for_test, calc=[{ 'func': 'mff', 'name': 'my_mff' }], output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() self.assertEqual(actual_field['my_mff'].get_value().tolist(), self.desired_value)
def merge(resource, dir_output=None, historical_concatination=False): """ returns list of paths of sorted and merged netCDF files. sort according to filename (in DSR convention) and merge appropriate netCDF files :param resource: list of netCDF file pathes equal domain and DSR name convention is required for merging :param historical_concatination : concatination of historical files to rcp szenarios (default = False) ;param dir_output : path to directory for output """ from os.path import curdir, basename, join from os import rename #from tempfile import mkdtemp import utils from ocgis import RequestDataset , OcgOperations #if type(resource) == list: #resource = {'merge':resource} res_dic = utils.sort_by_filename(resource, historical_concatination = historical_concatination) merged_files = [] if dir_output == None: dir_output = curdir for key in res_dic: if len(res_dic[key]) > 1: ncs = res_dic[key] var = key.split('_')[0] rd = RequestDataset(uri=ncs, variable=var ) ops = OcgOperations( dataset=rd, prefix = key, output_format='nc', dir_output = dir_output, add_auxiliary_files=False) m_file = ops.execute() var = key.split('_')[0] merged_files.append(utils.drs_filename(m_file, variable=var)) else: bn = basename(res_dic[key][0]) newname = str(join(dir_output, bn)) rename(bn,newname) merged_files.append(newname) return merged_files
def test_ocgis_average(): v1 = TESTDATA['cmip3_tasmin_sresa2_da_nc'][6:] v2 = TESTDATA['cmip3_tasmax_sresa2_da_nc'][6:] rd1 = RequestDataset(v1) rd2 = RequestDataset(v2) ops = OcgOperations([rd1, rd2], calc=[{ 'func': 'average', 'name': 'tas', 'kwds': { 'v1': 'tasmin', 'v2': 'tasmax' } }]) ret = ops.execute() t = ret.get_element()['tas'][0, :, :].get_value() t1 = rd1.get_field()['tasmin'][0, :, :].get_value() t2 = rd2.get_field()['tasmax'][0, :, :].get_value() aaae(np.mean([t1, t2], axis=0), t)
def test_env_overload(self): # check env overload out = tempfile.mkdtemp() try: env.DIR_OUTPUT = out env.PREFIX = 'my_prefix' rd = self.test_data.get_rd('daymet_tmax') ops = OcgOperations(dataset=rd, snippet=True) self.assertEqual(env.DIR_OUTPUT, ops.dir_output) self.assertEqual(env.PREFIX, ops.prefix) finally: os.rmdir(out) env.reset()
def test_validate_ops(self): rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd) with self.assertRaises(DefinitionValidationError): FakeAbstractDriver.validate_ops(ops) prev = FakeAbstractDriver.output_formats FakeAbstractDriver.output_formats = 'all' try: FakeAbstractDriver.validate_ops(ops) finally: FakeAbstractDriver.output_formats = prev
def test_calculate_operations(self): """Test calculation through operations.""" row = VectorDimension(value=[1, 2, 3, 4]) col = VectorDimension(value=[10, 11, 12]) grid = SpatialGridDimension(row=row, col=col) spatial = SpatialDimension(grid=grid) time = TemporalDimension(value=[1, 2]) field = Field(spatial=spatial, temporal=time) data = np.zeros((1, 2, 1, 4, 3), dtype=float) data[:, 0, :] = 1 data[:, 1, :] = 2 var = Variable(value=data, name='data') field.variables.add_variable(var) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret[1]['data'].variables['sum'].value.flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_get_progress_and_configure_logging(self): env.VERBOSE = True rd = self.test_data.get_rd('cancm4_tas') ops = OcgOperations(dataset=rd) outdir = self.current_dir_output prefix = 'foo' interp = OcgInterpreter(ops) self.assertIsNone(logging._warnings_showwarning) self.assertTrue(ocgis_lh.null) env.SUPPRESS_WARNINGS = False progress = interp._get_progress_and_configure_logging_(outdir, prefix) self.assertIsInstance(progress, ProgressOcgOperations) self.assertFalse(ocgis_lh.null) self.assertFalse(logging._warnings_showwarning)
def doCalc(species): print 'Working on %s' %(species) # Directory holding climate data. DATA_DIR = 'G:/WSI data verification/dataverification' # Data returns will overwrite in this case. Use with caution!! env.OVERWRITE = True env.DIR_SHPCABINET = DATA_DIR env.DIR_OUTPUT = DATA_DIR # Always start with a snippet (if there are no calculations!). SNIPPET = False #yearstr = str(year) # Filename to variable name mapping. uri = 'G:/WSI data verification/dataverification/'+ species + '/WSI_OCGIS_'+species+'.1979_2013.nc' shp = 'G:/WSI data verification/dataverification/duckzone.shp' # RequestDatasetCollection ####################################################### rdc = RequestDataset(uri, 'forcalc') # Return daily sum calc = [{'func': 'sum', 'name': 'sum'}] ### Write to Shapefile ########################################################### prefix = 'WSI_DZ_' + species #print('returning shapefile for ' + species) ops = OcgOperations(dataset=rdc, output_format='shp', time_region={'month': [1,2,3,4,9,10,11,12]}, spatial_operation='clip', geom=shp, calc=calc, calc_raw=True, aggregate=True, calc_grouping=['day', 'month', 'year'], prefix=prefix) ops.execute()
def test_system_through_operations(self): """Test calculation through operations.""" row = Variable(name='y', value=[1, 2, 3, 4], dimensions='y') col = Variable(name='x', value=[10, 11, 12], dimensions='x') grid = Grid(col, row) time = TemporalVariable(name='time', value=[1, 2], dimensions='time') data = Variable(name='data', dimensions=[time.dimensions[0]] + list(grid.dimensions)) data.get_value()[0, :] = 1 data.get_value()[1, :] = 2 field = Field(grid=grid, time=time, is_data=data) calc = [{'func': 'sum', 'name': 'sum'}] ops = OcgOperations(dataset=field, calc=calc, calc_grouping='day', calc_raw=True, aggregate=True) ret = ops.execute() actual = ret.get_element( variable_name='sum').get_masked_value().flatten() self.assertNumpyAll(actual, np.ma.array([12.0, 24.0]))
def test_system_through_operations(self): calc = [{'func': MockMultiParamFunction.key, 'name': 'my_mvp', 'kwds': self.parms_for_test}] ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc) ret = ops.execute() actual_variable = ret.get_element(variable_name='my_mvp') self.assertEqual(actual_variable.get_value().tolist(), self.desired_value) ops = OcgOperations(dataset=self.fields_for_ops_test, calc=calc, output_format='nc') ret = ops.execute() actual = RequestDataset(ret).get()['my_mvp'] self.assertEqual(actual.get_value().tolist(), self.desired_value)
def get_operations(self): """ :returns: An operations objects created by parsing the query string. :rtype: :class:`ocgis.driver.operations.OcgOperations` """ pmap = { klass.name: klass for klass in itersubclasses(AbstractParameter) } kwds = {} kwds[Dataset.name] = Dataset.from_query(self) for k, v in self.query_dict.items(): try: kwds[k] = pmap[k].from_query(self) except KeyError: # Some parameters require arguments different from the parameter name. if k not in self._expected_missing_keys: raise ops = OcgOperations(**kwds) return ops
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value( ) - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
## RequestDatasetCollection #################################################### rdc = RequestDatasetCollection([ RequestDataset(os.path.join(DATA_DIR, uri), var) for uri, var in NCS.iteritems() ]) ## Return In-Memory ############################################################ ## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...). ## A key in the returned dictionary corresponds to a geometry "ugid" with the ## value of type OcgCollection. ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries') ret = ops.execute() ## Write to Shapefile ########################################################## ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', output_format='shp') path = ops.execute() ## Write All Data to Keyed Format ##############################################
def call(resource=[], variable=None, dimension_map=None, calc=None, calc_grouping= None, conform_units_to=None, memory_limit=None, prefix=None, geom=None, output_format_options=False, search_radius_mult=2., select_nearest=False, select_ugid=None, time_region=None, time_range=None, dir_output=None, output_format='nc'): ''' ocgis operation call :param resource: :param variable: variable in the input file to be picked :param dimension_map: dimension map in case of unconventional starage of data :param calc: ocgis calc syntax for calcultion opartion :param calc_grouping: time aggregate grouping :param conform_units_to: :param memory_limit: limit the amout of data to be loaded into the memory at once if None(default) free memory is detected by birdhouse :param prefix: :param geom: name of shapefile stored in birdhouse shape cabinet :param output_format_options: output options for netCDF e.g compression level() :param search_radius_mult: search radius for point geometries. All included gridboxes will be returned :param select_nearest: neares neighbour selection for point geometries :param select_ugid: ugid for appropriate poligons :param time_region: :param time_range: sequence of two datetime.datetime objects to mark start and end point :param dir_output: :param output_format: :return: output file path ''' print 'start ocgis module' logger.info('Start ocgis module call function') from ocgis import OcgOperations, RequestDataset , env from ocgis.util.large_array import compute # prepare the environment env.DIR_SHPCABINET = DIR_SHP env.OVERWRITE = True env.DIR_OUTPUT = dir_output env.PREFIX = prefix if output_format_options == False: output_format_options = None elif output_format_options == True: output_format_options={'data_model': 'NETCDF4', # NETCDF4_CLASSIC 'variable_kwargs': {'zlib': True, 'complevel': 9}} else: logger.info('output_format_options are set to %s ' % ( output_format_options )) if type(resource) != list: resource = list([resource]) # execute ocgis logger.info('Execute ocgis module call function') try: rd = RequestDataset(resource, variable=variable, dimension_map=dimension_map, conform_units_to=conform_units_to, time_region=time_region) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, #options=options, calc=calc, calc_grouping=calc_grouping, geom=geom, output_format=output_format, search_radius_mult=search_radius_mult, select_nearest=select_nearest, select_ugid=select_ugid, add_auxiliary_files=False) logger.info('OcgOperations set') except Exception as e: logger.debug('failed to setup OcgOperations') raise # check memory load from numpy import sqrt from flyingpigeon.utils import FreeMemory if memory_limit == None: f = FreeMemory() mem_kb = f.user_free mem_mb = mem_kb / 1024. mem_limit = mem_mb / 2. # set limit to half of the free memory else: mem_limit = memory_limit if mem_limit >= 1024. * 4: mem_limit = 1024. * 4 # 475.0 MB for openDAP data_kb = ops.get_base_request_size()['total'] data_mb = data_kb / 1024. if variable == None: variable = rd.variable logger.info('%s as variable dedected' % (variable)) #data_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) logger.info('data_mb = %s ; memory_limit = %s ' % (data_mb , mem_limit )) if data_mb <= mem_limit : # input is smaler than the half of free memory size logger.info('ocgis module call as ops.execute()') try: geom_file = ops.execute() except Exception as e: logger.debug('failed to execute ocgis operation') raise else: size = ops.get_base_request_size() nb_time_coordinates_rd = size['variables'][variable]['temporal']['shape'][0] element_in_kb = size['total']/reduce(lambda x,y: x*y,size['variables'][variable]['value']['shape']) element_in_mb = element_in_kb / 1024. tile_dim = sqrt(mem_limit/(element_in_mb*nb_time_coordinates_rd)) # maximum chunk size # calcultion of chunk size try: logger.info('tile_dim = %s; calc = %s ' % (tile_dim, calc)) if calc == None: calc = '%s=%s*1' % (variable, variable) logger.info('calc set to = %s ' % calc) ops = OcgOperations(dataset=rd, output_format_options=output_format_options, calc=calc, output_format=output_format, # 'nc' is necessary for chunked execution select_ugid=select_ugid, geom=geom, add_auxiliary_files=False) geom_file = compute(ops, tile_dimension=int(tile_dim) , verbose=True) except Exception as e: logger.debug('failed to compute ocgis operation') raise logger.info('Succeeded with ocgis module call function') return geom_file
# Compute a custom percentile basis using ICCLIM. # Path to CF climate dataset. This examples uses the same file for indice and percentile basis calculation. in_file = '/path/to/cf_data.nc' # Subset the input dataset to return the desired base period for the percentile basis. variable = 'tas' years = range(2001, 2003) # A custom date range may be required for your data time_region = {'year': years} rd = RequestDataset(uri=in_file, variable=variable) field = rd.create_field() field = field.time.get_time_region(time_region).parent # Calculate the percentile basis. The data values must be a three-dimensional array. arr = field[variable].get_masked_value().squeeze() # This is the field data to use for the calculation dt_arr = field.temporal.value_datetime # This is an array of datetime objects. percentile = 90 window_width = 5 t_calendar, t_units = field.time.calendar, field.time.units # ICCLIM requires calendar and units for the calculation percentile_dict = IcclimTG90p.get_percentile_dict(arr, dt_arr, percentile, window_width, t_calendar, t_units) ######################################################################################################################## # Calculate indice using custom percentile basis. # Depending on the size of the data, this computation may take some time... calc = [{'func': 'icclim_TG90p', 'name': 'TG90p', 'kwds': {'percentile_dict': percentile_dict}}] calc_grouping = 'month' # Returns data as an in-memory spatial collection ops = OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping) coll = ops.execute()
SNIPPET = True # Data returns will overwrite in this case. Use with caution!! env.OVERWRITE = True # RequestDatasetCollection ############################################################################################# rdc = RequestDatasetCollection([RequestDataset( os.path.join(DATA_DIR, uri), var) for uri, var in NCS.iteritems()]) # Return In-Memory ##################################################################################################### # Data is returned as a dictionary-like object (SpatialCollection) with 51 keys (don't forget Puerto Rico...). A key in # the returned dictionary corresponds to a geometry "ugid" with the value of type OcgCollection. print('returning numpy...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries') ret = ops.execute() # Return a SpatialCollection, but only for a target state in a U.S. state boundaries shapefile. In this case, the UGID # attribute value of 23 is associated with Nebraska. print('returning numpy for a state...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', geom_select_uid=[23]) ret = ops.execute() # Write to Shapefile ################################################################################################### print('returning shapefile...') ops = OcgOperations(dataset=rdc, spatial_operation='clip', aggregate=True, snippet=SNIPPET, geom='state_boundaries', output_format='shp')
def test_combinatorial_projection_with_geometries(self): # self.get_ret(kwds={'output_format':'shp','prefix':'as_polygon'}) # self.get_ret(kwds={'output_format':'shp','prefix':'as_point','abstraction':'point'}) features = [ {'NAME': 'a', 'wkt': 'POLYGON((-105.020430 40.073118,-105.810753 39.327957,-105.660215 38.831183,-104.907527 38.763441,-104.004301 38.816129,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.643011 39.802151,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-103.959140 40.118280,-104.327957 40.201075,-104.327957 40.201075,-105.020430 40.073118))'}, {'NAME': 'b', 'wkt': 'POLYGON((-102.212903 39.004301,-102.905376 38.906452,-103.311828 37.694624,-103.326882 37.295699,-103.898925 37.220430,-103.846237 36.746237,-102.619355 37.107527,-102.634409 37.724731,-101.874194 37.882796,-102.212903 39.004301))'}, {'NAME': 'c', 'wkt': 'POLYGON((-105.336559 37.175269,-104.945161 37.303226,-104.726882 37.175269,-104.696774 36.844086,-105.043011 36.693548,-105.283871 36.640860,-105.336559 37.175269))'}, {'NAME': 'd', 'wkt': 'POLYGON((-102.318280 39.741935,-103.650538 39.779570,-103.620430 39.448387,-103.349462 39.433333,-103.078495 39.606452,-102.325806 39.613978,-102.325806 39.613978,-102.333333 39.741935,-102.318280 39.741935))'}, ] for filename in ['polygon', 'point']: if filename == 'point': geometry = 'Point' to_write = deepcopy(features) for feature in to_write: geom = wkt.loads(feature['wkt']) feature['wkt'] = geom.centroid.wkt else: to_write = features geometry = 'Polygon' path = os.path.join(self.current_dir_output, 'ab_{0}.shp'.format(filename)) with FionaMaker(path, geometry=geometry) as fm: fm.write(to_write) no_bounds_nc = SimpleNcNoBounds() no_bounds_nc.write() no_bounds_uri = os.path.join(env.DIR_OUTPUT, no_bounds_nc.filename) no_level_nc = SimpleNcNoLevel() no_level_nc.write() no_level_uri = os.path.join(env.DIR_OUTPUT, no_level_nc.filename) ocgis.env.DIR_SHPCABINET = self.current_dir_output # ocgis.env.DEBUG = True # ocgis.env.VERBOSE = True aggregate = [ False, True ] spatial_operation = [ 'intersects', 'clip' ] epsg = [ 2163, 4326, None ] output_format = [ constants.OUTPUT_FORMAT_NETCDF, constants.OUTPUT_FORMAT_SHAPEFILE, constants.OUTPUT_FORMAT_CSV_SHAPEFILE ] abstraction = [ 'polygon', 'point', None ] dataset = [ self.get_dataset(), {'uri': no_bounds_uri, 'variable': 'foo'}, {'uri': no_level_uri, 'variable': 'foo'} ] geom = [ 'ab_polygon', 'ab_point' ] calc = [ None, [{'func': 'mean', 'name': 'my_mean'}] ] calc_grouping = ['month'] args = (aggregate, spatial_operation, epsg, output_format, abstraction, geom, calc, dataset) for ii, tup in enumerate(itertools.product(*args)): a, s, e, o, ab, g, c, d = tup if os.path.split(d['uri'])[1] == 'test_simple_spatial_no_bounds_01.nc': unbounded = True else: unbounded = False if o == constants.OUTPUT_FORMAT_NETCDF and e == 4326: output_crs = CFWGS84() else: output_crs = CoordinateReferenceSystem(epsg=e) if e is not None else None kwds = dict(aggregate=a, spatial_operation=s, output_format=o, output_crs=output_crs, geom=g, abstraction=ab, dataset=d, prefix=str(ii), calc=c, calc_grouping=calc_grouping) try: ops = OcgOperations(**kwds) ret = ops.execute() except DefinitionValidationError: if o == constants.OUTPUT_FORMAT_NETCDF: if e not in [4326, None]: continue if s == 'clip': continue else: raise except ExtentError: if unbounded or ab == 'point': continue else: raise except ValueError: if unbounded and ab == 'polygon': continue if o == constants.OUTPUT_FORMAT_SHAPEFILE: ugid_path = os.path.join(self.current_dir_output, ops.prefix, ops.prefix + '_ugid.shp') else: ugid_path = os.path.join(self.current_dir_output, ops.prefix, constants.OUTPUT_FORMAT_SHAPEFILE, ops.prefix + '_ugid.shp') if o != constants.OUTPUT_FORMAT_NETCDF: with fiona.open(ugid_path, 'r') as f: if e: second = output_crs else: second = CoordinateReferenceSystem(epsg=4326) self.assertEqual(CoordinateReferenceSystem(value=f.meta['crs']), second) if o == constants.OUTPUT_FORMAT_SHAPEFILE: with fiona.open(ret, 'r') as f: if a and ab == 'point': second = 'MultiPoint' elif ab is None: field = RequestDataset(uri=d['uri'], variable='foo').get() second = field.spatial.geom.get_highest_order_abstraction().geom_type else: second = ab.title() if second in ['Polygon', 'MultiPolygon']: second = ['Polygon', 'MultiPolygon'] elif second in ['Point', 'MultiPoint']: second = ['Point', 'MultiPoint'] self.assertTrue(f.meta['schema']['geometry'] in second)
def test_calc_sample_size(self): rd1 = self.get_dataset() rd1['alias'] = 'var1' rd2 = self.get_dataset() rd2['alias'] = 'var2' dataset = [ # RequestDatasetCollection([rd1]), RequestDatasetCollection([rd1,rd2]) ] calc_sample_size = [ True, # False ] calc = [ [{'func':'mean','name':'mean'},{'func':'max','name':'max'}], # [{'func':'ln','name':'ln'}], # None, # [{'func':'divide','name':'divide','kwds':{'arr1':'var1','arr2':'var2'}}] ] calc_grouping = [ # None, ['month'], # ['month','year'] ] output_format = ['numpy'] for ii,tup in enumerate(itertools.product(dataset,calc_sample_size,calc,calc_grouping,output_format)): kwds = dict(zip(['dataset','calc_sample_size','calc','calc_grouping','output_format'],tup)) kwds['prefix'] = str(ii) try: ops = OcgOperations(**kwds) except DefinitionValidationError: if kwds['calc'] is not None: ## set functions require a temporal grouping otherwise the calculation ## is meaningless if kwds['calc'][0]['func'] == 'mean' and kwds['calc_grouping'] is None: continue ## multivariate functions may not implemented with sample size = True elif kwds['calc_sample_size'] and kwds['calc'][0]['func'] == 'divide': continue ## multivariate functions require the correct number of variables elif kwds['calc'][0]['func'] == 'divide' and len(kwds['dataset']) == 1: continue ## only one request dataset may be written to netCDF at this time elif kwds['output_format'] == 'nc' and len(kwds['dataset']) == 2: continue else: raise ## only one request dataset may be written to netCDF at this time elif kwds['output_format'] == 'nc' and len(ops.dataset) == 2: continue else: raise ret = ops.execute() if kwds['output_format'] == 'nc': if kwds['calc_sample_size'] and kwds['calc_grouping']: if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean': with self.nc_scope(ret) as ds: self.assertEqual(sum([v.startswith('n_') for v in ds.variables.keys()]),2) self.assertEqual(ds.variables['n_max'][:].mean(),30.5) if kwds['output_format'] == 'csv': if kwds['calc'] is not None and kwds['calc'][0]['func'] == 'mean': with open(ret,'r') as f: reader = DictReader(f) alias_set = set([row['CALC_ALIAS'] for row in reader]) if len(kwds['dataset']) == 1: if kwds['calc_sample_size']: self.assertEqual(alias_set,set(['max','n_max','n_mean','mean'])) else: self.assertEqual(alias_set,set(['max','mean'])) else: if kwds['calc_sample_size']: self.assertEqual(alias_set,set(['max_var1','n_max_var1','n_mean_var1','mean_var1', 'max_var2','n_max_var2','n_mean_var2','mean_var2'])) else: self.assertEqual(alias_set,set(['max_var1','mean_var1', 'max_var2','mean_var2']))
## Data returns will overwrite in this case. Use with caution!! env.OVERWRITE = True env.DIR_SHPCABINET = '/Users/ryan.okuinghttons/netCDFfiles/shapefiles/ocgis_data/shp' ## RequestDatasetCollection #################################################### rdc = RequestDatasetCollection([RequestDataset(os.path.join(DATA_DIR,NCS),'tas')]) ## Return In-Memory ############################################################ ## Data is returned as a dictionary with 51 keys (don't forget Puerto Rico...). ## A key in the returned dictionary corresponds to a geometry "ugid" with the ## value of type OcgCollection. print('returning numpy...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True, snippet=SNIPPET,geom='state_boundaries') path = ops.execute() ## Write to Shapefile ########################################################## print('returning shapefile...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True, snippet=SNIPPET,geom='state_boundaries',output_format='shp') path = ops.execute() ## Write All Data to Keyed Format ############################################## ## Without the snippet, we are writing all data to the linked CSV-Shapefile ## output format. The operation will take considerably longer. print('returning csv+...') ops = OcgOperations(dataset=rdc,spatial_operation='clip',aggregate=True,
def get_segetalflora( resource=[], dir_output=".", culture_type="fallow", climate_type=2, region=None, dimension_map=None ): """productive worker for segetalflora jobs :param resources: list of tas netCDF files. (Any time aggregation is possible) :param culture_type: Type of culture. Possible values are: 'fallow', 'intensive', 'extensive' (default:'fallow') :param climate_type: Type of climate: number 1 to 7 or 'all' (default: 2) :param region: Region for subset. If 'None' (default), the values will be calculated for Europe """ from flyingpigeon.subset import clipping from flyingpigeon.utils import calc_grouping, sort_by_filename import os from os import remove from tempfile import mkstemp from ocgis import RequestDataset, OcgOperations from cdo import Cdo cdo = Cdo() if not os.path.exists(dir_output): os.makedirs(dir_output) os.chdir(dir_output) # outputs = [] if region == None: region = "Europe" if not type(culture_type) == list: culture_type = list([culture_type]) if not type(climate_type) == list: climate_type = list([climate_type]) ncs = sort_by_filename(resource) print "%s experiments found" % (len(ncs)) print "keys: %s " % (ncs.keys()) # generate outfolder structure: dir_netCDF = "netCDF" dir_ascii = "ascii" dir_netCDF_tas = dir_netCDF + "/tas" dir_ascii_tas = dir_ascii + "/tas" if not os.path.exists(dir_netCDF): os.makedirs(dir_netCDF) if not os.path.exists(dir_ascii): os.makedirs(dir_ascii) if not os.path.exists(dir_netCDF_tas): os.makedirs(dir_netCDF_tas) if not os.path.exists(dir_ascii_tas): os.makedirs(dir_ascii_tas) tas_files = [] for key in ncs.keys(): try: print "process %s" % (key) calc = [{"func": "mean", "name": "tas"}] calc_group = calc_grouping("yr") prefix = key.replace(key.split("_")[7], "yr") if not os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): nc_tas = clipping( resource=ncs[key], variable="tas", calc=calc, dimension_map=dimension_map, calc_grouping=calc_group, prefix=prefix, polygons="Europe", dir_output=dir_netCDF_tas, )[0] print "clipping done for %s" % (key) if os.path.exists(os.path.join(dir_netCDF_tas, prefix + ".nc")): tas_files.append(prefix) else: print "clipping failed for %s: No output file exists" % (key) else: print "netCDF file already exists %s" % (key) nc_tas = os.path.join(dir_netCDF_tas, prefix + ".nc") except Exception as e: print "clipping failed for %s: %s" % (key, e) try: asc_tas = os.path.join(dir_ascii_tas, prefix + ".asc") if not os.path.exists(asc_tas): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_tas , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_tas, tmp) print cmd os.system(cmd) print ("tanslation to ascii done") remove_rows(tmp, asc_tas) remove(tmp) print ("rows with missing values removed") else: print ("tas ascii already exists") plot_ascii(asc_tas) except Exception as e: print "translation to ascii failed %s: %s" % (key, e) if os.path.exists(tmp): remove(tmp) tas_files = [os.path.join(dir_netCDF_tas, nc) for nc in os.listdir(dir_netCDF_tas)] outputs = [] for name in tas_files: for cult in culture_type: for climat in climate_type: try: calc = get_equation(culture_type=cult, climate_type=climat) if type(calc) != None: try: var = "sf%s%s" % (cult, climat) prefix = os.path.basename(name).replace("tas", var).strip(".nc") infile = name # os.path.join(dir_netCDF_tas,name+'.nc') dir_sf = os.path.join(dir_netCDF, var) if not os.path.exists(dir_sf): os.makedirs(dir_sf) if os.path.exists(os.path.join(dir_sf, prefix + ".nc")): nc_sf = os.path.join(dir_sf, prefix + ".nc") print "netCDF file already exists: %s %s " % (dir_sf, prefix) else: rd = RequestDataset(name, variable="tas", dimension_map=dimension_map) op = OcgOperations( dataset=rd, calc=calc, prefix=prefix, output_format="nc", dir_output=dir_sf, add_auxiliary_files=False, ) nc_sf = op.execute() print "segetalflora done for %s" % (prefix) outputs.append(prefix) dir_ascii_sf = os.path.join(dir_ascii, var) if not os.path.exists(dir_ascii_sf): os.makedirs(dir_ascii_sf) asc_sf = os.path.join(dir_ascii_sf, prefix + ".asc") if not os.path.exists(asc_sf): f, tmp = mkstemp(dir=os.curdir, suffix=".asc") tmp = tmp.replace(os.path.abspath(os.curdir), ".") # cdo.outputtab('name,date,lon,lat,value', input = nc_sf , output = tmp) cmd = "cdo outputtab,name,date,lon,lat,value %s > %s" % (nc_sf, tmp) os.system(cmd) print ("translation to ascii done") remove_rows(tmp, asc_sf) remove(tmp) print ("rows with missing values removed") else: print "ascii file already exists" plot_ascii(asc_sf) except Exception as e: print "failed for ascii file: %s %s " % (name, e) if os.path.exists(tmp): remove(tmp) else: print "NO EQUATION found for %s %s " % (cult, climat) except Exception as e: print "Segetal flora failed: %s" % (e) return outputs