def test_SpatialCoordinateVariables_add_to_dataset(): lat = SpatialCoordinateVariable(numpy.arange(19, -1, -1)) lon = SpatialCoordinateVariable(numpy.arange(10)) coords = SpatialCoordinateVariables(lon, lat, Proj(init='EPSG:4326')) lat_varname = 'lat' lon_varname = 'lon' outfilename = 'test.nc' try: with Dataset(outfilename, 'w') as target_ds: coords.add_to_dataset(target_ds, lon_varname, lat_varname) assert lat_varname in target_ds.dimensions assert lat_varname in target_ds.variables assert len(target_ds.dimensions[lat_varname]) == lat.values.size assert numpy.array_equal(lat.values, target_ds.variables[lat_varname][:]) assert lon_varname in target_ds.dimensions assert lon_varname in target_ds.variables assert len(target_ds.dimensions[lon_varname]) == lon.values.size assert numpy.array_equal(lon.values, target_ds.variables[lon_varname][:]) finally: if os.path.exists(outfilename): os.remove(outfilename)
def test_SpatialCoordinateVariables_slice_by_bbox(): lat = SpatialCoordinateVariable(numpy.arange(19, -1, -1)) lon = SpatialCoordinateVariable(numpy.arange(10)) proj = Proj(init='EPSG:4326') coords = SpatialCoordinateVariables(lon, lat, proj) subset = coords.slice_by_bbox(BBox((1.75, 3.7, 6.2, 16.7), proj)) assert numpy.array_equal(subset.x.values, numpy.arange(2, 6)) assert subset.x.values[0] == 2 assert subset.x.values[-1] == 5 assert subset.y.values[0] == 16 assert subset.y.values[-1] == 4
def load_region(self, region_name): """Loads data for a region if not already loaded, closing previous region as needed. Parameters ---------- region_name : str """ if region_name == self.region: return self.close() elevation_service = Service.objects.get( name="{}_dem".format(region_name)) elevation_record = elevation_service.variable_set.first() self.dataset = Dataset(DATA_PATH / elevation_service.data_path) self.coords = SpatialCoordinateVariables.from_dataset( self.dataset, x_name=elevation_record.x_dimension, y_name=elevation_record.y_dimension, projection=Proj(elevation_service.projection), ) self.data = self.dataset.variables[elevation_record.name] self.data.set_auto_mask(False) self.nodata_value = self.data._FillValue self.region = region_name
def get_results_image(self, bounds, size, single_color, kept_colors, gained_colors, species, historic, futures): kept_colors = self.get_colors(kept_colors, len(futures)+1) gained_colors = self.get_colors(gained_colors, len(futures)+1) extent = BBox(bounds, projection=WGS84) self.service = Service.objects.get(name='{}_p{}_800m_pa'.format(species, historic)) variable = self.service.variable_set.all().first() native_extent = extent.project(Proj(str(variable.projection))) coords = SpatialCoordinateVariables.from_bbox(variable.full_extent, *self.get_grid_spatial_dimensions(variable)) x_slice = coords.x.indices_for_range(native_extent.xmin, native_extent.xmax) y_slice = coords.y.indices_for_range(native_extent.ymin, native_extent.ymax) historic_data = self.get_grid_for_variable(variable, x_slice=x_slice, y_slice=y_slice) self.close_dataset() if not futures: data = historic_data renderer = UniqueValuesRenderer([(1, Color.from_hex(single_color))], fill_value=0) else: future_grids = [] for future in futures: self.service = Service.objects.get(name='{}_15gcm_{}_pa'.format(species, future)) variable = self.service.variable_set.all().first() future_grids.append(self.get_grid_for_variable(variable, x_slice=x_slice, y_slice=y_slice)) self.close_dataset() future_data = sum(future_grids) del future_grids data = numpy.zeros_like(historic_data, numpy.uint8) data[historic_data == 1] = 1 kept_idx = (historic_data == 1) & (future_data > 0) data[kept_idx] = future_data[kept_idx] + 1 gained_idx = (historic_data == 0) & (future_data > 0) data[gained_idx] = future_data[gained_idx] + len(kept_colors) + 1 data[data.mask == 1] = 0 values = numpy.unique(data) renderer = UniqueValuesRenderer( [ (i+1, Color.from_hex(c)) for (i, c) in enumerate(kept_colors) if i+1 in values ] + [ (i+len(kept_colors)+1, Color.from_hex(c)) for (i, c) in enumerate(gained_colors) if i+len(kept_colors)+1 in values ], fill_value=0 ) image = renderer.render_image(data.data).convert('RGBA') return GeoImage(image, native_extent).warp(extent, size).image
def raster_to_netcdf(filename_or_raster, outfilename=None, variable_name='data', format='NETCDF4', **kwargs): """ Parameters ---------- filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset outfilename: name of output file. If blank, will be same name as input with *.nc extension added variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4 format kwargs: arguments passed to variable creation: zlib Note: only rasters with descending y coordinates are currently supported """ start = time.time() if isinstance(filename_or_raster, string_types): if not os.path.exists(filename_or_raster): raise ValueError('File does not exist: {0}'.format(filename_or_raster)) src = rasterio.open(filename_or_raster) managed_raster = True else: src = filename_or_raster managed_raster = False if not src.count == 1: raise NotImplementedError('ERROR: multi-band rasters not yet supported for this operation') prj = pyproj.Proj(**src.crs) outfilename = outfilename or src.name + '.nc' with Dataset(outfilename, 'w', format=format) as target: if prj.is_latlong(): x_varname = 'longitude' y_varname = 'latitude' else: x_varname = 'x' y_varname = 'y' # TODO: may need to do this in blocks if source is big data = src.read(1, masked=True) coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj), src.width, src.height) coords.add_to_dataset(target, x_varname, y_varname, **kwargs) out_var = target.createVariable(variable_name, data.dtype, dimensions=(y_varname, x_varname), **kwargs) out_var[:] = data set_crs(target, variable_name, prj, set_proj4_att=False) if managed_raster: src.close() print('Elapsed {0:.3f} seconds'.format(time.time() - start))
def get_mask(self, **kwargs): try: geoJSON = kwargs['geoJSON'] except ValueError: raise ValueError('Missing constraint arguments') features = geoJSON['features'] geometries = [f['geometry'] for f in features] coords = SpatialCoordinateVariables.from_bbox(self.data.extent, *reversed(self.data.shape)) return rasterize( geometries, out_shape=self.data.shape, fill=1, transform=coords.affine, all_touched=True, default_value=0, dtype=numpy.uint8 )
def warp_to_grid(self, path): with rasterio.open(path) as dataset: bbox = self.data.extent vrt_options = { 'resampling': Resampling.nearest, 'dst_crs': CRS.from_string(bbox.projection.srs), 'dst_transform': SpatialCoordinateVariables.from_bbox( bbox, self.data.shape[1], self.data.shape[0] ).affine, 'dst_height': self.data.shape[self.data.y_dim], 'dst_width': self.data.shape[self.data.x_dim] } with WarpedVRT(dataset, **vrt_options) as vrt: return vrt.read(1, masked=True)
def get_mask(self, **kwargs): try: min_lon = kwargs['min'] max_lon = kwargs['max'] except KeyError: raise ValueError('Missing constraint arguments') min_lon, max_lon = sorted((min_lon, max_lon)) coords = SpatialCoordinateVariables.from_bbox(self.data.extent, *reversed(self.data.shape)) half_pixel_size = float(coords.x.pixel_size) / 2 start, stop = coords.x.indices_for_range(min_lon + half_pixel_size, max_lon - half_pixel_size) mask = numpy.zeros_like(self.data, 'bool') mask[:,:start] = True mask[:,stop+1:] = True return mask
def load_variable_data(self, variable, region, year, model=None): if variable == 'LAT': with Dataset( os.path.join(NC_SERVICE_DIR, 'regions', region, '{}_dem.nc'.format(region))) as ds: return create_latitude_data( SpatialCoordinateVariables.from_dataset(ds)) if model is not None: year = '{model}_{year}'.format(model=model, year=year) service = Service.objects.get( name='{region}_{year}Y_{variable}'.format( region=region, year=year, variable=variable)) variable = service.variable_set.first() self.service = variable.service self.dataset = None data = self.get_grid_for_variable(variable) return Raster(data, variable.full_extent, 1, 0, Y_INCREASING)
def get_mask(self, **kwargs): try: min_elevation = kwargs['min'] max_elevation = kwargs['max'] except KeyError: raise ValueError('Missing constraint arguments') service = Service.objects.get(name='{}_dem'.format(self.region)) with Dataset(os.path.join(settings.NC_SERVICE_DATA_ROOT, service.data_path)) as ds: v = service.variable_set.first() coords = SpatialCoordinateVariables.from_bbox( v.full_extent, ds.variables[v.x_dimension].size, ds.variables[v.y_dimension].size, dtype='float64' ) window = coords.get_window_for_bbox(self.data.extent) elevation = ds.variables['elevation'][window.y_slice, window.x_slice] mask = elevation < min_elevation mask |= elevation > max_elevation return mask
def get_mask(self, hours, lat, lon, year, month, day): date = datetime.date(year, month, day) daylight = self.daylight(date, lat, lon) service = Service.objects.get(name='{}_dem'.format(self.region)) with Dataset(os.path.join(settings.NC_SERVICE_DATA_ROOT, service.data_path)) as ds: lat_arr = ds['lat'][:] lon_arr = ds['lon'][:] coords = SpatialCoordinateVariables.from_bbox( service.full_extent, ds.variables['lon'].size, ds.variables['lat'].size, dtype='float64' ) window = coords.get_window_for_bbox(self.data.extent) daylight_arr = self.daylight_array(date, lat_arr[window.y_slice], lon_arr[window.x_slice]) mask = daylight_arr < (daylight - hours) mask |= daylight_arr > (daylight + hours) return mask
def get_mask(self, lat, lon, distance): wgs84 = pyproj.Proj('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs') p = pyproj.Proj({ 'proj': 'tmerc', 'lat_0': lat, 'lon_0': lon, 'k': 1, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'towgs84': '0,0,0,0,0,0,0', 'units': 'm' }) # Snap point to nearest cell center coords = SpatialCoordinateVariables.from_bbox(self.data.extent, *reversed(self.data.shape)) lat_pixel_size = coords.y.pixel_size if coords.y.is_ascending_order() else -1 * coords.y.pixel_size lat = ( sorted(coords.y.values)[ int((lat - coords.y.pixel_size * 1.5 - self.data.extent.ymin) / coords.y.pixel_size) ] - lat_pixel_size/2 ) lon = ( sorted(coords.x.values)[ int((lon - coords.x.pixel_size * 1.5 - self.data.extent.xmin) / coords.x.pixel_size) ] - coords.x.pixel_size/2 ) project_to_custom = partial(pyproj.transform, wgs84, p) project_to_data = partial(pyproj.transform, p, self.data.extent.projection) shape = transform( project_to_data, transform(project_to_custom, Point(lon, lat)).buffer(distance * 1000, resolution=64) ) return rasterize( [shape], out_shape=self.data.shape, fill=1, transform=coords.affine, all_touched=True, default_value=0, dtype=numpy.uint8 )
def render_netcdf(filename_pattern, variable, output_directory, renderer_file, save_file, renderer_type, colormap, fill, colorspace, palette, palette_stretch, scale, id_variable, lh, legend_breaks, legend_ticks, legend_precision, format, src_crs, dst_crs, res, resampling, anchors, interactive_map, mask_path): """ Render netcdf files to images. colormap is ignored if renderer_file is provided --dst-crs is ignored if using --map option (always uses EPSG:3857 If no colormap or palette is provided, a default palette may be chosen based on the name of the variable. If provided, mask must be 1 for areas to be masked out, and 0 otherwise. It must be in the same CRS as the input datasets, and have the same spatial dimensions. """ # Parameter overrides if interactive_map: dst_crs = 'EPSG:3857' filenames = glob.glob(filename_pattern) if not filenames: raise click.BadParameter('No files found matching that pattern', param='filename_pattern', param_hint='FILENAME_PATTERN') if not os.path.exists(output_directory): os.makedirs(output_directory) mask = get_mask(mask_path) if mask_path is not None else None if renderer_file is not None and not save_file: if not os.path.exists(renderer_file): raise click.BadParameter('does not exist', param='renderer_file', param_hint='renderer_file') # see https://bitbucket.org/databasin/ncdjango/wiki/Home for format renderer_dict = json.loads(open(renderer_file).read()) if variable in renderer_dict and not 'colors' in renderer_dict: renderer_dict = renderer_dict[variable] renderer_type = renderer_dict['type'] if renderer_type == 'stretched': colors = ','.join([str(c[0]) for c in renderer_dict['colors']]) if 'min' in colors or 'max' in colors or 'mean' in colors: statistics = collect_statistics(filenames, (variable, ), mask=mask)[variable] for entry in renderer_dict['colors']: if isinstance(entry[0], basestring): if entry[0] in ('min', 'max', 'mean'): entry[0] = statistics[entry[0]] elif '*' in entry[0]: rel_value, statistic = entry[0].split('*') entry[0] = float(rel_value) * statistics[statistic] renderer = renderer_from_dict(renderer_dict) else: if renderer_type == 'stretched': if palette is not None: renderer = palette_to_stretched_renderer(palette, palette_stretch, filenames, variable, fill_value=fill, mask=mask) elif colormap is None and variable in DEFAULT_PALETTES: palette, palette_stretch = DEFAULT_PALETTES[variable] renderer = palette_to_stretched_renderer(palette, palette_stretch, filenames, variable, fill_value=fill, mask=mask) else: if colormap is None: colormap = 'min:#000000,max:#FFFFFF' renderer = colormap_to_stretched_renderer(colormap, colorspace, filenames, variable, fill_value=fill, mask=mask) elif renderer_type == 'classified': if not palette: raise click.BadParameter( 'palette required for classified (for now)', param='--palette', param_hint='--palette') renderer = palette_to_classified_renderer( palette, filenames, variable, method='equal', fill_value=fill, mask=mask) # TODO: other methods if save_file: if os.path.exists(save_file): with open(save_file, 'r+') as output_file: data = json.loads(output_file.read()) output_file.seek(0) output_file.truncate() data[variable] = renderer.serialize() output_file.write(json.dumps(data, indent=4)) else: with open(save_file, 'w') as output_file: output_file.write(json.dumps({variable: renderer.serialize()})) if renderer_type == 'stretched': if legend_ticks is not None and not legend_breaks: legend_ticks = [float(v) for v in legend_ticks.split(',')] legend = renderer.get_legend( image_height=lh, breaks=legend_breaks, ticks=legend_ticks, max_precision=legend_precision)[0].to_image() elif renderer_type == 'classified': legend = composite_elements(renderer.get_legend()) legend.save( os.path.join(output_directory, '{0}_legend.png'.format(variable))) with Dataset(filenames[0]) as ds: var_obj = ds.variables[variable] dimensions = var_obj.dimensions shape = var_obj.shape num_dimensions = len(shape) if num_dimensions == 3: if id_variable: if shape[0] != ds.variables[id_variable][:].shape[0]: raise click.BadParameter( 'must be same dimensionality as 3rd dimension of {0}'. format(variable), param='--id_variable', param_hint='--id_variable') else: # Guess from the 3rd dimension guess = dimensions[0] if guess in ds.variables and ds.variables[guess][:].shape[ 0] == shape[0]: id_variable = guess ds_crs = get_crs(ds, variable) if not ds_crs and is_geographic(ds, variable): ds_crs = 'EPSG:4326' # Assume all geographic data is WGS84 src_crs = CRS.from_string(ds_crs) if ds_crs else CRS( {'init': src_crs}) if src_crs else None # get transforms, assume last 2 dimensions on variable are spatial in row, col order y_dim, x_dim = dimensions[-2:] coords = SpatialCoordinateVariables.from_dataset( ds, x_dim, y_dim, projection=Proj(src_crs.to_dict()) if src_crs else None) if mask is not None and not mask.shape == shape[-2:]: # Will likely break before this if collecting statistics raise click.BadParameter( 'mask variable shape does not match shape of input spatial dimensions', param='--mask', param_hint='--mask') flip_y = False reproject_kwargs = None if dst_crs is not None: if not src_crs: raise click.BadParameter('must provide src_crs to reproject', param='--src-crs', param_hint='--src-crs') dst_crs = CRS.from_string(dst_crs) src_height, src_width = coords.shape dst_transform, dst_width, dst_height = calculate_default_transform( src_crs, dst_crs, src_width, src_height, *coords.bbox.as_list(), resolution=res) reproject_kwargs = { 'src_crs': src_crs, 'src_transform': coords.affine, 'dst_crs': dst_crs, 'dst_transform': dst_transform, 'resampling': getattr(Resampling, resampling), 'dst_shape': (dst_height, dst_width) } else: dst_transform = coords.affine dst_height, dst_width = coords.shape dst_crs = src_crs if coords.y.is_ascending_order(): # Only needed if we are not already reprojecting the data, since that will flip it automatically flip_y = True if anchors or interactive_map: if not (dst_crs or src_crs): raise click.BadParameter( 'must provide at least src_crs to get Leaflet anchors or interactive map', param='--src-crs', param_hint='--src-crs') leaflet_anchors = get_leaflet_anchors( BBox.from_affine( dst_transform, dst_width, dst_height, projection=Proj(dst_crs) if dst_crs else None)) if anchors: click.echo('Anchors: {0}'.format(leaflet_anchors)) layers = {} for filename in filenames: with Dataset(filename) as ds: click.echo('Processing {0}'.format(filename)) filename_root = os.path.split(filename)[1].replace('.nc', '') if not variable in ds.variables: raise click.BadParameter( 'variable {0} was not found in file: {1}'.format( variable, filename), param='variable', param_hint='VARIABLE') var_obj = ds.variables[variable] if not var_obj.dimensions == dimensions: raise click.ClickException( 'All datasets must have the same dimensions for {0}'. format(variable)) if num_dimensions == 2: data = var_obj[:] if mask is not None: data = numpy.ma.masked_array(data, mask=mask) image_filename = os.path.join( output_directory, '{0}_{1}.{2}'.format(filename_root, variable, format)) if reproject_kwargs: data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale, flip_y=flip_y, format=format) local_filename = os.path.split(image_filename)[1] layers[os.path.splitext(local_filename)[0]] = local_filename elif num_dimensions == 3: for index in range(shape[0]): id = ds.variables[id_variable][ index] if id_variable is not None else index image_filename = os.path.join( output_directory, '{0}_{1}__{2}.{3}'.format(filename_root, variable, id, format)) data = var_obj[index] if mask is not None: data = numpy.ma.masked_array(data, mask=mask) if reproject_kwargs: data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale, flip_y=flip_y, format=format) local_filename = os.path.split(image_filename)[1] layers[os.path.splitext(local_filename) [0]] = local_filename # TODO: not tested recently. Make sure still correct # else: # # Assume last 2 components of shape are lat & lon, rest are iterated over # id_variables = None # if id_variable is not None: # id_variables = id_variable.split(',') # for index, name in enumerate(id_variables): # if name: # assert data.shape[index] == ds.variables[name][:].shape[0] # # ranges = [] # for dim in data.shape[:-2]: # ranges.append(range(0, dim)) # for combined_index in product(*ranges): # id_parts = [] # for index, dim_index in enumerate(combined_index): # if id_variables is not None and index < len(id_variables) and id_variables[index]: # id = ds.variables[id_variables[index]][dim_index] # # if not isinstance(id, basestring): # if isinstance(id, Iterable): # id = '_'.join((str(i) for i in id)) # else: # id = str(id) # # id_parts.append(id) # # else: # id_parts.append(str(dim_index)) # # combined_id = '_'.join(id_parts) # image_filename = os.path.join(output_directory, '{0}__{1}.{2}'.format(filename_root, combined_id, format)) # if reproject_kwargs: # data = warp_array(data, **reproject_kwargs) # NOTE: lack of index will break this # render_image(renderer, data[combined_index], image_filename, scale, flip_y=flip_y, format=format) # # local_filename = os.path.split(image_filename)[1] # layers[os.path.splitext(local_filename)[0]] = local_filename if interactive_map: index_html = os.path.join(output_directory, 'index.html') with open(index_html, 'w') as out: template = Environment( loader=PackageLoader('trefoil.cli')).get_template('map.html') out.write( template.render(layers=json.dumps(layers), bounds=str(leaflet_anchors), variable=variable)) webbrowser.open(index_html)
def main(in_pattern, out_pattern, boundary, single, varname): """ Clips and masks large NetCDF datasets to regional datasets based on the boundary. The in_pattern and out_pattern arguments should be filename patterns (can include path) with the pattern: /path/to/in_netcdf_{variable}.nc. Example usage: python cut_to_region.py NorthAmerica/NA_{variable}.nc USWest/west_{variable}.nc west.shp """ if single and not varname: print('--varname is required when --single is used') sys.exit(-1) if single: if not os.path.exists(in_pattern): print('Input file {} does not exist.'.format(in_pattern)) sys.exit(-1) input_paths = [(in_pattern, varname)] else: input_paths = [(in_pattern.format(variable=x), x) for x in VARIABLES] for path, _ in input_paths: if not os.path.exists(path): print('Input file {} does not exist.'.format(path)) sys.exit(-1) with fiona.open(boundary, 'r') as shp: features = [] wgs84 = Proj('+init=EPSG:4326') shp_projection = Proj(shp.crs) bounds = shp.bounds ll = transform(shp_projection, wgs84, bounds[0], bounds[1]) ur = transform(shp_projection, wgs84, bounds[2], bounds[3]) bbox = BBox([*ll, *ur], projection=wgs84) for feature in shp.items(): geometry = transform_geom(shp.crs, {'init': 'EPSG: 4326'}, feature[1]['geometry']) features.append(geometry) for in_path, variable in input_paths: if single: out_path = out_pattern else: out_path = out_pattern.format(variable=variable) if os.path.exists(out_path): confirm = input( "The output file '{}' already exists? Do you with to replace it? [y/n] " .format(out_path)) if confirm.lower().strip() not in ['y', 'yes']: print('Exiting...') sys.exit() with Dataset(in_path, 'r') as ds: coords = SpatialCoordinateVariables.from_dataset( ds, x_name='longitude', y_name='latitude') x_start, x_stop = coords.x.indices_for_range(bbox.xmin, bbox.xmax) y_start, y_stop = coords.y.indices_for_range(bbox.ymin, bbox.ymax) x_slice = slice(x_start, x_stop) y_slice = slice(y_start, y_stop) clipped_coords = coords.slice_by_bbox(bbox) grid = ds.variables[variable][y_slice, x_slice] if is_masked(grid): mask = grid.mask.astype('uint8') else: mask = numpy.zeros(grid.shape, dtype='uint8') mask |= rasterize(((x, 0) for x in features), out_shape=mask.shape, transform=clipped_coords.affine, fill=1, default_value=0) grid = numpy.ma.masked_where(mask == 1, grid.data) print('Writing {}...'.format(out_path)) with Dataset(out_path, 'w', format='NETCDF4') as ds: clipped_coords.add_to_dataset(ds, 'longitude', 'latitude') data_var = ds.createVariable(variable, grid.dtype, dimensions=('latitude', 'longitude'), fill_value=grid.fill_value) if data_var.shape != grid.shape: grid = grid[:data_var.shape[0], :data_var.shape[1]] data_var[:] = grid set_crs(ds, variable, Proj('+init=EPSG:4326'))
def map_eems( eems_file, # output_directory, scale, format, src_crs, resampling): """ Render a NetCDF EEMS model to a web map. """ from EEMSBasePackage import EEMSCmd, EEMSProgram model = EEMSProgram(eems_file) # For each data producing command, store the netcdf file that contains it file_vars = dict() raw_variables = set() for cmd in model.orderedCmds: # This is bottom up, may want to invert filename = None variable = None if cmd.HasResultName(): filename = cmd.GetParam('OutFileName') variable = cmd.GetResultName() elif cmd.IsReadCmd(): filename = cmd.GetParam('OutFileName') variable = cmd.GetParam('NewFieldName') raw_variables.add(variable) if filename and variable: if not filename in file_vars: file_vars[filename] = [] file_vars[filename].append(variable) filenames =file_vars.keys() for filename in filenames: if not os.path.exists(filename): raise click.ClickException('Could not find data file from EEMS model: {0}'.format(filename)) dst_crs = 'EPSG:3857' output_directory = tempfile.mkdtemp() click.echo('Using temp directory: {0}'.format(output_directory)) # if not os.path.exists(output_directory): # os.makedirs(output_directory) # Since fuzzy renderer is hardcoded, we can output it now fuzzy_renderer = palette_to_stretched_renderer(DEFAULT_PALETTES['fuzzy'], '1,-1') fuzzy_renderer.get_legend(image_height=150)[0].to_image().save(os.path.join(output_directory, 'fuzzy_legend.png')) template_filename = filenames[0] template_var = file_vars[template_filename][0] with Dataset(template_filename) as ds: var_obj = ds.variables[template_var] dimensions = var_obj.dimensions shape = var_obj.shape num_dimensions = len(shape) if num_dimensions != 2: raise click.ClickException('Only 2 dimensions are allowed on data variables for now') ds_crs = get_crs(ds, template_var) if not ds_crs and is_geographic(ds, template_var): ds_crs = 'EPSG:4326' # Assume all geographic data is WGS84 src_crs = CRS.from_string(ds_crs) if ds_crs else CRS({'init': src_crs}) if src_crs else None # get transforms, assume last 2 dimensions on variable are spatial in row, col order y_dim, x_dim = dimensions[-2:] coords = SpatialCoordinateVariables.from_dataset( ds, x_dim, y_dim, projection=Proj(src_crs) if src_crs else None ) # # if mask is not None and not mask.shape == shape[-2:]: # # Will likely break before this if collecting statistics # raise click.BadParameter( # 'mask variable shape does not match shape of input spatial dimensions', # param='--mask', param_hint='--mask' # ) # if not src_crs: raise click.BadParameter('must provide src_crs to reproject', param='--src-crs', param_hint='--src-crs') dst_crs = CRS.from_string(dst_crs) src_height, src_width = coords.shape dst_transform, dst_width, dst_height = calculate_default_transform( src_crs, dst_crs, src_width, src_height, *coords.bbox.as_list() ) reproject_kwargs = { 'src_crs': src_crs, 'src_transform': coords.affine, 'dst_crs': dst_crs, 'dst_transform': dst_transform, 'resampling': getattr(Resampling, resampling), 'dst_shape': (dst_height, dst_width) } if not (dst_crs or src_crs): raise click.BadParameter('must provide valid src_crs to get interactive map', param='--src-crs', param_hint='--src-crs') leaflet_anchors = get_leaflet_anchors(BBox.from_affine(dst_transform, dst_width, dst_height, projection=Proj(dst_crs) if dst_crs else None)) layers = {} for filename in filenames: with Dataset(filename) as ds: click.echo('Processing dataset {0}'.format(filename)) for variable in file_vars[filename]: click.echo('Processing variable {0}'.format(variable)) if not variable in ds.variables: raise click.ClickException('variable {0} was not found in file: {1}'.format(variable, filename)) var_obj = ds.variables[variable] if not var_obj.dimensions == dimensions: raise click.ClickException('All datasets must have the same dimensions for {0}'.format(variable)) data = var_obj[:] # if mask is not None: # data = numpy.ma.masked_array(data, mask=mask) if variable in raw_variables: palette = DEFAULT_PALETTES['raw'] palette_stretch = '{0},{1}'.format(data.max(), data.min()) renderer = palette_to_stretched_renderer(palette, palette_stretch) renderer.get_legend(image_height=150, max_precision=2)[0].to_image().save(os.path.join(output_directory, '{0}_legend.png'.format(variable))) else: renderer = fuzzy_renderer image_filename = os.path.join(output_directory, '{0}.{1}'.format(variable, format)) data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale=scale, format=format) local_filename = os.path.split(image_filename)[1] layers[variable] = local_filename index_html = os.path.join(output_directory, 'index.html') with open(index_html, 'w') as out: template = Environment(loader=PackageLoader('trefoil.cli')).get_template('eems_map.html') out.write( template.render( layers=json.dumps(layers), bounds=str(leaflet_anchors), tree=[[cmd, depth] for (cmd, depth) in model.GetCmdTree()], raw_variables=list(raw_variables) ) ) webbrowser.open(index_html)
def test_SpatialCoordinateVariables_bbox(): proj = Proj(init='EPSG:4326') bbox = BBox((10.5, 5, 110.5, 55), projection=proj) coords = SpatialCoordinateVariables.from_bbox(bbox, 10, 5) assert coords.bbox.as_list() == bbox.as_list()
def test_window_for_bbox(): coords = SpatialCoordinateVariables.from_bbox(BBox([-124, 82, -122, 90], Proj(init='epsg:4326')), 20, 20) window = coords.get_window_for_bbox(BBox([-123.9, 82.4, -122.1, 89.6])) assert window.x_slice == slice(1, 19) assert window.y_slice == slice(1, 19)
def describe(path_or_dataset): if isinstance(path_or_dataset, string_types): dataset = Dataset(path_or_dataset) else: dataset = path_or_dataset description = { 'dimensions': {}, 'variables': {}, 'attributes': get_ncattrs(dataset) } for dimension_name in dataset.dimensions: dimension = dataset.dimensions[dimension_name] description['dimensions'][dimension_name] = {'length': len(dimension)} for variable_name in dataset.variables: variable = dataset.variables[variable_name] if not variable.dimensions: # Do not collect info about dimensionless variables (e.g., CRS variable) continue dtype = str(variable.dtype) if "'" in dtype: dtype = dtype.split("'")[1] attributes = get_ncattrs(variable) variable_info = { 'attributes': attributes, 'dimensions': variable.dimensions, 'data_type': dtype, 'name': attributes.get('long_name') or attributes.get('standard_name') or variable_name } if dtype not in ('str', ): if len(variable.shape) > 2: # Avoid loading the entire array into memory by iterating along the first index (usually time) variable_info.update({ 'min': min(variable[i, :].min().item() for i in range(variable.shape[0])), 'max': max(variable[i, :].max().item() for i in range(variable.shape[0])) }) else: data = variable[:] variable_info.update({ 'min': data.min().item(), 'max': data.max().item() }) if variable_name in dataset.dimensions and dtype not in ('str', ): dimension_variable = dataset.variables[variable_name] if len(dimension_variable.dimensions ) == 1: # range dimensions don't make sense for interval interval = get_interval(dimension_variable) if interval: variable_info['interval'] = interval else: # Data variable proj4 = get_crs(dataset, variable_name) #extent if len(variable.dimensions) >= 2: x_variable_name = None y_variable_name = None time_variable_name = None for dimension_name in (x for x in variable.dimensions if x in dataset.variables): attributes = get_ncattrs(dataset.variables[dimension_name]) standard_name = attributes.get('standard_name', None) if standard_name in X_DIMENSION_STANDARD_NAMES or dimension_name in X_DIMENSION_COMMON_NAMES: x_variable_name = dimension_name elif standard_name in Y_DIMENSION_STANDARD_NAMES or dimension_name in Y_DIMENSION_COMMON_NAMES: y_variable_name = dimension_name elif standard_name in TIME_DIMENSION_STANDARD_NAMES or dimension_name in TIME_DIMENSION_COMMON_NAMES: if len(dataset.dimensions[dimension_name]) > 1: time_variable_name = dimension_name if x_variable_name and y_variable_name: if proj4 is None and is_geographic(dataset, variable_name): # Assume WGS84 proj4 = PROJ4_GEOGRAPHIC coordinates = SpatialCoordinateVariables( SpatialCoordinateVariable( dataset.variables[x_variable_name]), SpatialCoordinateVariable( dataset.variables[y_variable_name]), Proj(str(proj4)) if proj4 else None) variable_info['spatial_grid'] = { 'extent': coordinates.bbox.as_dict(), 'x_dimension': x_variable_name, 'x_resolution': coordinates.x.pixel_size, 'y_dimension': y_variable_name, 'y_resolution': coordinates.y.pixel_size } if time_variable_name: time_variable = dataset.variables[time_variable_name] time_info = { 'dimension': time_variable_name, } try: date_variable = DateVariable(time_variable) values = date_variable.datetimes time_info['extent'] = [ values.min().isoformat(), values.max().isoformat() ] time_info['interval_unit'] = date_variable.unit interval = get_interval(time_variable) if interval is not None: time_info['interval'] = interval except ValueError: pass variable_info['time'] = time_info if proj4: variable_info['proj4'] = proj4 description['variables'][variable_name] = variable_info return description
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [ list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0))) ] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter( 'Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = {'fill_value': fill_value} format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format( output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()
def main(original_file, climatena_file, out_dir, valid_variables): with rasterio.open(original_file) as ds: bounds = ds.bounds affine = ds.transform shape = ds.shape with open(climatena_file, 'r') as f_in: headers = csv.DictReader(f_in).fieldnames variables = [ x for x in headers if x not in ('ID1', 'ID2', 'Latitude', 'Longitude', 'Elevation') ] if valid_variables: valid = [x.strip().lower() for x in valid_variables.split(',')] else: valid = variables print('Creating datasets...') grid = numpy.zeros(shape, dtype='int32') grid = numpy.ma.masked_where(grid == 0, grid) for var in (x for x in variables if x.lower() in valid): out_path = os.path.join( out_dir, '{}_{}.nc'.format( os.path.splitext(os.path.basename(climatena_file))[0], var)) if os.path.exists(out_path): continue with Dataset(out_path, 'w', format='NETCDF4') as ds: projection = Proj('EPSG:4326') coord_vars = SpatialCoordinateVariables.from_bbox( BBox(bounds, projection=projection), *reversed(grid.shape)) coord_vars.add_to_dataset(ds, 'longitude', 'latitude') data_var = ds.createVariable(var, grid.dtype, dimensions=('latitude', 'longitude'), fill_value=grid.fill_value) data_var[:] = grid set_crs(ds, var, projection) print('Copying from ClimateNA data... (0%)', end='\r') with open(climatena_file, 'r') as f_in: f_in.seek(0, os.SEEK_END) end = f_in.tell() f_in.seek(0) f_in.readline() # Skip headers while f_in.tell() < end: lines = ''.join(f_in.readline() for _ in range(1000000)) arr = numpy.loadtxt(StringIO(lines), delimiter=',', usecols=[ headers.index(x) for x in ['Latitude', 'Longitude'] + variables ]) arr = numpy.moveaxis(arr, 1, 0) latitudes = arr[0] longitudes = arr[1] for i, var in enumerate(variables): if var.lower() in valid: out_path = os.path.join( out_dir, '{}_{}.nc'.format( os.path.splitext( os.path.basename(climatena_file))[0], var)) variable = arr[i + 2] with Dataset(out_path, 'a') as ds: grid = ds.variables[var][:] fill_value = grid.fill_value grid = grid.data for j, value in enumerate(variable): if value == -9999: continue col, row = [ int(round(x)) for x in ~affine * (longitudes[j], latitudes[j]) ] if var in MULTIPLIERS: value *= MULTIPLIERS[var] grid[row][col] = value ds.variables[var][:] = numpy.ma.masked_where( grid == fill_value, grid) print('Copying from ClimateNA data... ({}%)'.format( round(f_in.tell() / end * 100)), end='\r') print('Copying from ClimateNA data... (100%)') print('Done.')
def warp_like(ds, ds_projection, variables, out_ds, template_ds, template_varname, resampling=Resampling.nearest): """ Warp one or more variables in a NetCDF file based on the coordinate reference system and spatial domain of a template NetCDF file. :param ds: source dataset :param ds_projection: source dataset coordiante reference system, proj4 string or EPSG:NNNN code :param variables: list of variable names in source dataset to warp :param out_ds: output dataset. Must be opened in write or append mode. :param template_ds: template dataset :param template_varname: variable name for template data variable in template dataset :param resampling: resampling method. See rasterio.enums.Resampling for options """ template_variable = template_ds.variables[template_varname] template_prj = Proj(get_crs(template_ds, template_varname)) template_mask = template_variable[:].mask template_y_name, template_x_name = template_variable.dimensions[-2:] template_coords = SpatialCoordinateVariables.from_dataset( template_ds, x_name=template_x_name, y_name=template_y_name, projection=template_prj) # template_geo_bbox = template_coords.bbox.project(ds_prj, edge_points=21) # TODO: add when needing to subset ds_y_name, ds_x_name = ds.variables[variables[0]].dimensions[-2:] proj = Proj( init=ds_projection) if 'EPSG:' in ds_projection.upper() else Proj( str(ds_projection)) ds_coords = SpatialCoordinateVariables.from_dataset(ds, x_name=ds_x_name, y_name=ds_y_name, projection=proj) with rasterio.Env(): # Copy dimensions for variable across to output for dim_name in template_variable.dimensions: if not dim_name in out_ds.dimensions: if dim_name in template_ds.variables and not dim_name in out_ds.variables: copy_variable(template_ds, out_ds, dim_name) else: copy_dimension(template_ds, out_ds, dim_name) for variable_name in variables: click.echo('Processing: {0}'.format(variable_name)) variable = ds.variables[variable_name] fill_value = getattr(variable, '_FillValue', variable[0, 0].fill_value) for dim_name in variable.dimensions[:-2]: if not dim_name in out_ds.dimensions: if dim_name in ds.variables: copy_variable(ds, out_ds, dim_name) else: copy_dimension(ds, out_ds, dim_name) out_var = out_ds.createVariable( variable_name, variable.dtype, dimensions=variable.dimensions[:-2] + template_variable.dimensions, fill_value=fill_value) reproject_kwargs = { 'src_transform': ds_coords.affine, 'src_crs': CRS.from_string(ds_projection), 'dst_transform': template_coords.affine, 'dst_crs': template_prj.srs, 'resampling': resampling, 'src_nodata': fill_value, 'dst_nodata': fill_value, 'threads': 4 } # TODO: may only need to select out what is in window if len(variable.shape) == 3: idxs = range(variable.shape[0]) with click.progressbar(idxs) as bar: for i in bar: # print('processing slice: {0}'.format(i)) data = variable[i, :] out = numpy.ma.empty(template_coords.shape, dtype=data.dtype) out.mask = template_mask out.fill(fill_value) reproject(data, out, **reproject_kwargs) out_var[i, :] = out else: data = variable[:] out = numpy.ma.empty(template_coords.shape, dtype=data.dtype) out.mask = template_mask out.fill(fill_value) reproject(data, out, **reproject_kwargs) out_var[:] = out
def to_netcdf( files, output, variable, dtype, src_crs, x_name, y_name, z_name, datetime_pattern, netcdf3, compress, packed, xy_dtype, # z_dtype, calendar, autocrop): """ Convert rasters to NetCDF and stack them according to a dimension. X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y otherwise) unless specified. Will overwrite an existing NetCDF file. Only the first band of the input will be turned into a NetCDF file. """ # TODO: add format string template to this to parse out components filenames = list(glob.glob(files)) if not filenames: raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES') z_values = [] if datetime_pattern is not None: datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames) # Sort both datimes and filenames by datetimes z_values, filenames = [list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0)))] items = tuple(enumerate(filenames)) has_z = len(filenames) > 1 if has_z and not z_name: raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z') if src_crs: src_crs = CRS.from_string(src_crs) template_ds = rasterio.open(filenames[0]) src_crs = template_ds.crs or src_crs if not src_crs: raise click.BadParameter('Required when no CRS information available in source files', param='--src-crs', param_hint='--src-crs') prj = Proj(**src_crs.to_dict()) bounds = template_ds.bounds width = template_ds.width height = template_ds.height window = None src_dtype = numpy.dtype(template_ds.dtypes[0]) dtype = numpy.dtype(dtype) if dtype else src_dtype if dtype == src_dtype: fill_value = template_ds.nodata if src_dtype.kind in ('u', 'i'): # nodata always comes from rasterio as floating point fill_value = int(fill_value) else: fill_value = get_fill_value(dtype) x_name = x_name or ('lon' if src_crs.is_geographic else 'x') y_name = y_name or ('lat' if src_crs.is_geographic else 'y') var_kwargs = { 'fill_value': fill_value } format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' with Dataset(output, 'w', format=format) as out: if packed or autocrop: mins = [] maxs = [] windows = [] click.echo('Inspecting input datasets...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True) if packed: mins.append(data.min()) maxs.append(data.max()) if autocrop: data_window = get_data_window(data) if data_window != ((0, height), (0, width)): windows.append(data_window) if packed: min_value = min(mins) max_value = max(maxs) scale, offset = get_pack_atts(dtype, min_value, max_value) if autocrop and windows: window = union(windows) bounds = template_ds.window_bounds(window) height = window[0][1] - window[0][0] width = window[1][1] - window[1][0] coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype) coords.add_to_dataset(out, x_name, y_name, zlib=compress) var_dimensions = [y_name, x_name] shape = list(coords.shape) if has_z: shape.insert(0, len(filenames)) out.createDimension(z_name, shape[0]) var_dimensions.insert(0, z_name) if z_values: dates = DateVariable(numpy.array(z_values), units_start_date=z_values[0], calendar=calendar) dates.add_to_dataset(out, z_name) click.echo('Creating {0}:{1} with shape {2}'.format(output, variable, shape)) out_var = out.createVariable(variable, dtype, dimensions=var_dimensions, zlib=compress, **var_kwargs) set_crs(out, variable, prj, set_proj4_att=True) if packed: out_var.setncattr('scale_factor', scale) out_var.setncattr('add_offset', offset) click.echo('Copying data from input files...') with click.progressbar(items) as iter: for index, filename in iter: with rasterio.open(filename) as src: data = src.read(1, masked=True, window=window) if has_z: out_var[index, :] = data else: out_var[:] = data out.sync()
def zones( input, output, variable, attribute, like, netcdf3, zip): """ Create zones in a NetCDF from features in a shapefile. This is intended to be used as input to zonal statistics functions; it is not intended as a direct replacement for rasterizing geometries into NetCDF. Only handles < 65,535 features for now. If --attribute is provided, any features that do not have this will not be assigned to zones. A values lookup will be used to store values. The zones are indices of the unique values encountered when extracting features. The original values are stored in an additional variable with the name of the zones variable plus '_values'. Template NetCDF dataset must have a valid projection defined or be inferred from dimensions (e.g., lat / long). """ with Dataset(like) as template_ds: template_varname = list(data_variables(template_ds).keys())[0] template_variable = template_ds.variables[template_varname] template_crs = get_crs(template_ds, template_varname) if template_crs: template_crs = CRS.from_string(template_crs) elif is_geographic(template_ds, template_varname): template_crs = CRS({'init': 'EPSG:4326'}) else: raise click.UsageError('template dataset must have a valid projection defined') spatial_dimensions = template_variable.dimensions[-2:] out_shape = template_variable.shape[-2:] template_y_name, template_x_name = spatial_dimensions coords = SpatialCoordinateVariables.from_dataset( template_ds, x_name=template_x_name, y_name=template_y_name, projection=Proj(**template_crs.to_dict()) ) with fiona.open(input, 'r') as shp: if attribute: if not attribute in shp.meta['schema']['properties']: raise click.BadParameter('{0} not found in dataset'.format(attribute), param='--attribute', param_hint='--attribute') att_dtype = shp.meta['schema']['properties'][attribute].split(':')[0] if not att_dtype in ('int', 'str'): raise click.BadParameter('integer or string attribute required'.format(attribute), param='--attribute', param_hint='--attribute') transform_required = CRS(shp.crs) != template_crs geometries = [] values = set() values_lookup = {} # Project bbox for filtering bbox = coords.bbox if transform_required: bbox = bbox.project(Proj(**shp.crs), edge_points=21) index = 0 for f in shp.filter(bbox=bbox.as_list()): value = f['properties'].get(attribute) if attribute else int(f['id']) if value is not None: geom = f['geometry'] if transform_required: geom = transform_geom(shp.crs, template_crs, geom) geometries.append((geom, index)) if not value in values: values.add(value) values_lookup[index] = value index += 1 # Otherwise, these will not be rasterized num_geometries = len(geometries) # Save a slot at the end for nodata if num_geometries < 255: dtype = numpy.dtype('uint8') elif num_geometries < 65535: dtype = numpy.dtype('uint16') else: raise click.UsageError('Too many features to rasterize: {0}, Exceptioning...'.format(num_geometries)) fill_value = get_fill_value(dtype) click.echo('Rasterizing {0} features into zones'.format(num_geometries)) with rasterio.Env(): zones = rasterize( geometries, out_shape=out_shape, transform=coords.affine, all_touched=False, # True produces undesirable results for adjacent polygons fill=fill_value, dtype=dtype ) format = 'NETCDF4' out_dtype = dtype if netcdf3: format = 'NETCDF3_CLASSIC' if dtype == numpy.uint8: out_dtype = numpy.dtype('int16') elif dtype == numpy.uint16: out_dtype = numpy.dtype('int32') # Have to convert fill_value to mask since we changed data type zones = numpy.ma.masked_array(zones, mask=(zones == fill_value)) with Dataset(output, 'w', format=format) as out: values_varname = '{0}_values'.format(variable) coords.add_to_dataset(out, template_x_name, template_y_name) out_var = out.createVariable(variable, out_dtype, dimensions=spatial_dimensions, zlib=zip, fill_value=get_fill_value(out_dtype)) out_var.setncattr('values', values_varname) out_var[:] = zones out_values = numpy.array([values_lookup[k] for k in range(0, len(values_lookup))]) if netcdf3 and out_values.dtype == numpy.int64: out_values = out_values.astype('int32') out.createDimension(values_varname, len(out_values)) values_var = out.createVariable(values_varname, out_values.dtype, dimensions=(values_varname, ), zlib=zip) values_var[:] = out_values
def mask( input, output, variable, like, netcdf3, all_touched, invert, zip): """ Create a NetCDF mask from a shapefile. Values are equivalent to a numpy mask: 0 for unmasked areas, and 1 for masked areas. Template NetCDF dataset must have a valid projection defined or be inferred from dimensions (e.g., lat / long) """ with Dataset(like) as template_ds: template_varname = data_variables(template_ds).keys()[0] template_variable = template_ds.variables[template_varname] template_crs = get_crs(template_ds, template_varname) if template_crs: template_crs = CRS.from_string(template_crs) elif is_geographic(template_ds, template_varname): template_crs = CRS({'init': 'EPSG:4326'}) else: raise click.UsageError('template dataset must have a valid projection defined') spatial_dimensions = template_variable.dimensions[-2:] mask_shape = template_variable.shape[-2:] template_y_name, template_x_name = spatial_dimensions coords = SpatialCoordinateVariables.from_dataset( template_ds, x_name=template_x_name, y_name=template_y_name, projection=Proj(**template_crs.to_dict()) ) with fiona.open(input, 'r') as shp: transform_required = CRS(shp.crs) != template_crs # Project bbox for filtering bbox = coords.bbox if transform_required: bbox = bbox.project(Proj(**shp.crs), edge_points=21) geometries = [] for f in shp.filter(bbox=bbox.as_list()): geom = f['geometry'] if transform_required: geom = transform_geom(shp.crs, template_crs, geom) geometries.append(geom) click.echo('Converting {0} features to mask'.format(len(geometries))) if invert: fill_value = 0 default_value = 1 else: fill_value = 1 default_value = 0 with rasterio.Env(): # Rasterize features to 0, leaving background as 1 mask = rasterize( geometries, out_shape=mask_shape, transform=coords.affine, all_touched=all_touched, fill=fill_value, default_value=default_value, dtype=numpy.uint8 ) format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' dtype = 'int8' if netcdf3 else 'uint8' with Dataset(output, 'w', format=format) as out: coords.add_to_dataset(out, template_x_name, template_y_name) out_var = out.createVariable(variable, dtype, dimensions=spatial_dimensions, zlib=zip, fill_value=get_fill_value(dtype)) out_var[:] = mask
def render_netcdf( filename_pattern, variable, output_directory, renderer_file, save_file, renderer_type, colormap, fill, colorspace, palette, palette_stretch, scale, id_variable, lh, legend_breaks, legend_ticks, legend_precision, format, src_crs, dst_crs, res, resampling, anchors, interactive_map, mask_path): """ Render netcdf files to images. colormap is ignored if renderer_file is provided --dst-crs is ignored if using --map option (always uses EPSG:3857 If no colormap or palette is provided, a default palette may be chosen based on the name of the variable. If provided, mask must be 1 for areas to be masked out, and 0 otherwise. It must be in the same CRS as the input datasets, and have the same spatial dimensions. """ # Parameter overrides if interactive_map: dst_crs = 'EPSG:3857' filenames = glob.glob(filename_pattern) if not filenames: raise click.BadParameter('No files found matching that pattern', param='filename_pattern', param_hint='FILENAME_PATTERN') if not os.path.exists(output_directory): os.makedirs(output_directory) mask = get_mask(mask_path) if mask_path is not None else None if renderer_file is not None and not save_file: if not os.path.exists(renderer_file): raise click.BadParameter('does not exist', param='renderer_file', param_hint='renderer_file') # see https://bitbucket.org/databasin/ncdjango/wiki/Home for format renderer_dict = json.loads(open(renderer_file).read()) if variable in renderer_dict and not 'colors' in renderer_dict: renderer_dict = renderer_dict[variable] renderer_type = renderer_dict['type'] if renderer_type == 'stretched': colors = ','.join([str(c[0]) for c in renderer_dict['colors']]) if 'min' in colors or 'max' in colors or 'mean' in colors: statistics = collect_statistics(filenames, (variable,), mask=mask)[variable] for entry in renderer_dict['colors']: if isinstance(entry[0], basestring): if entry[0] in ('min', 'max', 'mean'): entry[0] = statistics[entry[0]] elif '*' in entry[0]: rel_value, statistic = entry[0].split('*') entry[0] = float(rel_value) * statistics[statistic] renderer = renderer_from_dict(renderer_dict) else: if renderer_type == 'stretched': if palette is not None: renderer = palette_to_stretched_renderer(palette, palette_stretch, filenames, variable, fill_value=fill, mask=mask) elif colormap is None and variable in DEFAULT_PALETTES: palette, palette_stretch = DEFAULT_PALETTES[variable] renderer = palette_to_stretched_renderer(palette, palette_stretch, filenames, variable, fill_value=fill, mask=mask) else: if colormap is None: colormap = 'min:#000000,max:#FFFFFF' renderer = colormap_to_stretched_renderer(colormap, colorspace, filenames, variable, fill_value=fill, mask=mask) elif renderer_type == 'classified': if not palette: raise click.BadParameter('palette required for classified (for now)', param='--palette', param_hint='--palette') renderer = palette_to_classified_renderer(palette, filenames, variable, method='equal', fill_value=fill, mask=mask) # TODO: other methods if save_file: if os.path.exists(save_file): with open(save_file, 'r+') as output_file: data = json.loads(output_file.read()) output_file.seek(0) output_file.truncate() data[variable] = renderer.serialize() output_file.write(json.dumps(data, indent=4)) else: with open(save_file, 'w') as output_file: output_file.write(json.dumps({variable: renderer.serialize()})) if renderer_type == 'stretched': if legend_ticks is not None and not legend_breaks: legend_ticks = [float(v) for v in legend_ticks.split(',')] legend = renderer.get_legend(image_height=lh, breaks=legend_breaks, ticks=legend_ticks, max_precision=legend_precision)[0].to_image() elif renderer_type == 'classified': legend = composite_elements(renderer.get_legend()) legend.save(os.path.join(output_directory, '{0}_legend.png'.format(variable))) with Dataset(filenames[0]) as ds: var_obj = ds.variables[variable] dimensions = var_obj.dimensions shape = var_obj.shape num_dimensions = len(shape) if num_dimensions == 3: if id_variable: if shape[0] != ds.variables[id_variable][:].shape[0]: raise click.BadParameter('must be same dimensionality as 3rd dimension of {0}'.format(variable), param='--id_variable', param_hint='--id_variable') else: # Guess from the 3rd dimension guess = dimensions[0] if guess in ds.variables and ds.variables[guess][:].shape[0] == shape[0]: id_variable = guess ds_crs = get_crs(ds, variable) if not ds_crs and is_geographic(ds, variable): ds_crs = 'EPSG:4326' # Assume all geographic data is WGS84 src_crs = CRS.from_string(ds_crs) if ds_crs else CRS({'init': src_crs}) if src_crs else None # get transforms, assume last 2 dimensions on variable are spatial in row, col order y_dim, x_dim = dimensions[-2:] coords = SpatialCoordinateVariables.from_dataset( ds, x_dim, y_dim, projection=Proj(src_crs.to_dict()) if src_crs else None ) if mask is not None and not mask.shape == shape[-2:]: # Will likely break before this if collecting statistics raise click.BadParameter( 'mask variable shape does not match shape of input spatial dimensions', param='--mask', param_hint='--mask' ) flip_y = False reproject_kwargs = None if dst_crs is not None: if not src_crs: raise click.BadParameter('must provide src_crs to reproject', param='--src-crs', param_hint='--src-crs') dst_crs = CRS.from_string(dst_crs) src_height, src_width = coords.shape dst_transform, dst_width, dst_height = calculate_default_transform( src_crs, dst_crs, src_width, src_height, *coords.bbox.as_list(), resolution=res ) reproject_kwargs = { 'src_crs': src_crs, 'src_transform': coords.affine, 'dst_crs': dst_crs, 'dst_transform': dst_transform, 'resampling': getattr(Resampling, resampling), 'dst_shape': (dst_height, dst_width) } else: dst_transform = coords.affine dst_height, dst_width = coords.shape dst_crs = src_crs if coords.y.is_ascending_order(): # Only needed if we are not already reprojecting the data, since that will flip it automatically flip_y = True if anchors or interactive_map: if not (dst_crs or src_crs): raise click.BadParameter('must provide at least src_crs to get Leaflet anchors or interactive map', param='--src-crs', param_hint='--src-crs') leaflet_anchors = get_leaflet_anchors(BBox.from_affine(dst_transform, dst_width, dst_height, projection=Proj(dst_crs) if dst_crs else None)) if anchors: click.echo('Anchors: {0}'.format(leaflet_anchors)) layers = {} for filename in filenames: with Dataset(filename) as ds: click.echo('Processing {0}'.format(filename)) filename_root = os.path.split(filename)[1].replace('.nc', '') if not variable in ds.variables: raise click.BadParameter('variable {0} was not found in file: {1}'.format(variable, filename), param='variable', param_hint='VARIABLE') var_obj = ds.variables[variable] if not var_obj.dimensions == dimensions: raise click.ClickException('All datasets must have the same dimensions for {0}'.format(variable)) if num_dimensions == 2: data = var_obj[:] if mask is not None: data = numpy.ma.masked_array(data, mask=mask) image_filename = os.path.join(output_directory, '{0}_{1}.{2}'.format(filename_root, variable, format)) if reproject_kwargs: data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale, flip_y=flip_y, format=format) local_filename = os.path.split(image_filename)[1] layers[os.path.splitext(local_filename)[0]] = local_filename elif num_dimensions == 3: for index in range(shape[0]): id = ds.variables[id_variable][index] if id_variable is not None else index image_filename = os.path.join(output_directory, '{0}_{1}__{2}.{3}'.format(filename_root, variable, id, format)) data = var_obj[index] if mask is not None: data = numpy.ma.masked_array(data, mask=mask) if reproject_kwargs: data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale, flip_y=flip_y, format=format) local_filename = os.path.split(image_filename)[1] layers[os.path.splitext(local_filename)[0]] = local_filename # TODO: not tested recently. Make sure still correct # else: # # Assume last 2 components of shape are lat & lon, rest are iterated over # id_variables = None # if id_variable is not None: # id_variables = id_variable.split(',') # for index, name in enumerate(id_variables): # if name: # assert data.shape[index] == ds.variables[name][:].shape[0] # # ranges = [] # for dim in data.shape[:-2]: # ranges.append(range(0, dim)) # for combined_index in product(*ranges): # id_parts = [] # for index, dim_index in enumerate(combined_index): # if id_variables is not None and index < len(id_variables) and id_variables[index]: # id = ds.variables[id_variables[index]][dim_index] # # if not isinstance(id, basestring): # if isinstance(id, Iterable): # id = '_'.join((str(i) for i in id)) # else: # id = str(id) # # id_parts.append(id) # # else: # id_parts.append(str(dim_index)) # # combined_id = '_'.join(id_parts) # image_filename = os.path.join(output_directory, '{0}__{1}.{2}'.format(filename_root, combined_id, format)) # if reproject_kwargs: # data = warp_array(data, **reproject_kwargs) # NOTE: lack of index will break this # render_image(renderer, data[combined_index], image_filename, scale, flip_y=flip_y, format=format) # # local_filename = os.path.split(image_filename)[1] # layers[os.path.splitext(local_filename)[0]] = local_filename if interactive_map: index_html = os.path.join(output_directory, 'index.html') with open(index_html, 'w') as out: template = Environment(loader=PackageLoader('trefoil.cli')).get_template('map.html') out.write( template.render( layers=json.dumps(layers), bounds=str(leaflet_anchors), variable=variable ) ) webbrowser.open(index_html)
def handle(self, *args, **options): message = ( "WARNING: This will update all service data, casting each to it's smallest possible data type. Do you want " "to continue? [y/n]") if input(message).lower() not in {'y', 'yes'}: return for service in Service.objects.all(): if service.variable_set.all().count() > 1: print("Skipping service '{}' with more than one variable...". format(service.name)) continue variable = service.variable_set.all().get() path = os.path.join(SERVICE_DATA_ROOT, service.data_path) tmp_dir = mkdtemp() tmp_path = os.path.join(tmp_dir, os.path.basename(service.data_path)) try: with Dataset(path, 'r') as ds: data = ds.variables[variable.variable][:] coords = SpatialCoordinateVariables.from_bbox( service.full_extent, *reversed(data.shape)) if data.dtype.kind != 'i': print("Ignoring service '{}' with non-int type".format( service.name)) continue # The fill value will be the minimum value of the chosen type, so we want to make sure it's not # included in the actual data range min_value = data.min() - 1 max_value = data.max() # Determine the most suitable data type by finding the minimum type for the min/max values and then # using the type that will accurately represent both min_type = str(numpy.min_scalar_type(min_value)) max_type = str(numpy.min_scalar_type(max_value)) min_unsigned, min_size = min_type.split('int') max_unsigned, max_size = max_type.split('int') dtype = '{}int{}'.format(min_unsigned and max_unsigned, max(int(min_size), int(max_size))) if data.dtype == dtype: print( "Service '{}' already has the smallest possible type: {}" .format(service.name, dtype)) continue print("Converting service '{}' to type: {}".format( service.name, dtype)) with Dataset(tmp_path, 'w', format='NETCDF4') as ds: coords.add_to_dataset(ds, variable.x_dimension, variable.y_dimension) data = data.astype(dtype) fill_value = numpy.ma.maximum_fill_value( numpy.dtype(dtype)) numpy.ma.set_fill_value(data, fill_value) data_var = ds.createVariable( variable.variable, dtype, dimensions=(variable.y_dimension, variable.x_dimension), fill_value=fill_value) data_var[:] = data set_crs(ds, variable.variable, service.full_extent.projection) os.unlink(path) shutil.copy2(tmp_path, path) finally: try: shutil.rmtree(tmp_dir) except OSError: pass
def process_web_outputs(results, job, publish_raster_results=False, renderer_or_fn=None): outputs = results.format_args() for k, v in iter(outputs.items()): if is_raster(v) and publish_raster_results: service_name = '{0}/{1}'.format(job.uuid, k) rel_path = '{}.nc'.format(service_name) abs_path = os.path.join(SERVICE_DATA_ROOT, rel_path) os.makedirs(os.path.dirname(abs_path)) with Dataset(abs_path, 'w', format='NETCDF4') as ds: if is_latlong(v.extent.projection): x_var = 'longitude' y_var = 'latitude' else: x_var = 'x' y_var = 'y' coord_vars = SpatialCoordinateVariables.from_bbox( v.extent, *reversed(v.shape)) coord_vars.add_to_dataset(ds, x_var, y_var) fill_value = v.fill_value if numpy.ma.core.is_masked( v) else None data_var = ds.createVariable('data', v.dtype, dimensions=(y_var, x_var), fill_value=fill_value) data_var[:] = v set_crs(ds, 'data', v.extent.projection) if callable(renderer_or_fn): renderer = renderer_or_fn(v) elif renderer_or_fn is None: renderer = StretchedRenderer([ (numpy.min(v).item(), Color(0, 0, 0)), (numpy.max(v).item(), Color(255, 255, 255)) ]) else: renderer = renderer_or_fn with transaction.atomic(): service = Service.objects.create( name=service_name, description= ('This service has been automatically generated from the result of a geoprocessing job.' ), data_path=rel_path, projection=v.extent.projection.srs, full_extent=v.extent, initial_extent=v.extent, ) Variable.objects.create(service=service, index=0, variable='data', projection=v.extent.projection.srs, x_dimension=x_var, y_dimension=y_var, name='data', renderer=renderer, full_extent=v.extent) ProcessingResultService.objects.create(job=job, service=service) outputs[k] = service_name elif is_ndarray(v): if v.size < numpy.get_printoptions()['threshold']: outputs[k] = v.tolist() else: outputs[k] = str(v) return outputs
def handle(self, datasets, directory, overwrite, *args, **options): old_files = [] for dataset in datasets: filename = os.path.basename(dataset) name = os.path.splitext(filename)[0] if directory is not None: filename = '{}/{}'.format(directory.strip('/'), filename) name = '{}/{}'.format(directory.strip('/'), name) with transaction.atomic(): existing = Service.objects.filter(name__iexact=name) if existing.exists(): if overwrite: old_files.append( os.path.join(SERVICE_DIR, existing.get().data_path)) existing.delete() else: raise CommandError( "A service named '{}' already exists".format(name)) with Dataset(dataset, 'r') as ds: variables = [] x_dimension = None y_dimension = None projection = None desc = describe(ds) for variable, variable_info in desc['variables'].items(): if 'spatial_grid' in variable_info: variables.append(variable) spatial_grid = variable_info['spatial_grid'] x_dimension = spatial_grid['x_dimension'] y_dimension = spatial_grid['y_dimension'] projection = Proj(variable_info['proj4']) if not variables: raise CommandError('No usable variables found') coords = SpatialCoordinateVariables.from_dataset( ds, x_dimension, y_dimension, projection=projection) service = Service.objects.create(name=name, data_path=filename, projection=coords.projection, full_extent=coords.bbox, initial_extent=coords.bbox) for variable in variables: Variable.objects.create(service=service, index=0, variable=variable, projection=projection, x_dimension=x_dimension, y_dimension=y_dimension, name=variable, renderer=StretchedRenderer([ (variable_info['min'], Color(0, 0, 0)), (variable_info['max'], Color(255, 255, 255)) ]), full_extent=coords.bbox) print('Added {}...'.format(name)) for path in old_files: if os.path.exists(path): os.remove(path) for dataset in datasets: target_dir = Path(SERVICE_DIR) / (directory or '') if not os.path.exists(target_dir): os.makedirs(target_dir) shutil.copy(dataset, target_dir)
def mask(input, output, variable, like, netcdf3, all_touched, invert, zip): """ Create a NetCDF mask from a shapefile. Values are equivalent to a numpy mask: 0 for unmasked areas, and 1 for masked areas. Template NetCDF dataset must have a valid projection defined or be inferred from dimensions (e.g., lat / long) """ with Dataset(like) as template_ds: template_varname = data_variables(template_ds).keys()[0] template_variable = template_ds.variables[template_varname] template_crs = get_crs(template_ds, template_varname) if template_crs: template_crs = CRS.from_string(template_crs) elif is_geographic(template_ds, template_varname): template_crs = CRS({'init': 'EPSG:4326'}) else: raise click.UsageError( 'template dataset must have a valid projection defined') spatial_dimensions = template_variable.dimensions[-2:] mask_shape = template_variable.shape[-2:] template_y_name, template_x_name = spatial_dimensions coords = SpatialCoordinateVariables.from_dataset( template_ds, x_name=template_x_name, y_name=template_y_name, projection=Proj(**template_crs.to_dict())) with fiona.open(input, 'r') as shp: transform_required = CRS(shp.crs) != template_crs # Project bbox for filtering bbox = coords.bbox if transform_required: bbox = bbox.project(Proj(**shp.crs), edge_points=21) geometries = [] for f in shp.filter(bbox=bbox.as_list()): geom = f['geometry'] if transform_required: geom = transform_geom(shp.crs, template_crs, geom) geometries.append(geom) click.echo('Converting {0} features to mask'.format(len(geometries))) if invert: fill_value = 0 default_value = 1 else: fill_value = 1 default_value = 0 with rasterio.Env(): # Rasterize features to 0, leaving background as 1 mask = rasterize(geometries, out_shape=mask_shape, transform=coords.affine, all_touched=all_touched, fill=fill_value, default_value=default_value, dtype=numpy.uint8) format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4' dtype = 'int8' if netcdf3 else 'uint8' with Dataset(output, 'w', format=format) as out: coords.add_to_dataset(out, template_x_name, template_y_name) out_var = out.createVariable(variable, dtype, dimensions=spatial_dimensions, zlib=zip, fill_value=get_fill_value(dtype)) out_var[:] = mask
def netcdf_to_raster( path_or_dataset, variable_name, outfilename, index=0, projection=None): """ Exports a 2D slice from a netcdf file to a raster file. Only GeoTiffs are supported at this time. Parameters ---------- path_or_dataset: path to NetCDF file or open Dataset variable_name: name of data variable to export from dataset outfilename: output filename index: index within 3rd dimension (in first position) or 0 projection: pyproj.Proj object. Automatically determined from file if possible """ if isinstance(path_or_dataset, string_types): dataset = Dataset(path_or_dataset) else: dataset = path_or_dataset projection = projection or get_crs(dataset, variable_name) if not projection: raise ValueError('Projection must be provided; ' 'no projection information can be determined from file') # TODO figure out cleaner way to get affine or coords y_name, x_name = dataset.variables[variable_name].dimensions[:2] coords = SpatialCoordinateVariables.from_dataset( dataset, x_name, y_name, projection=projection) affine = coords.affine if outfilename.lower().endswith('.tif'): format = 'GTiff' else: raise ValueError('Only GeoTiff outputs supported, filename must have .tif extension') variable = dataset.variables[variable_name] ndims = len(variable.shape) if ndims == 2: if index != 0: raise ValueError('Index out of range, must be 0') data = variable[:] elif ndims == 3: # Assumes that time dimension is first if index < 0 or index >= variable.shape[0]: raise ValueError('Index out of range, ' 'must be between 0 and {0}'.variable.shape[0]) data = variable[index] else: raise ValueError( 'Unsupported number of dimensions {0} for variable {1}, ' 'must be 2 or 3'.format(ndims, variable_name)) array_to_raster( data, outfilename, format=format, projection=projection, affine=affine)
def raster_to_netcdf(filename_or_raster, outfilename=None, variable_name='data', format='NETCDF4', **kwargs): """ Parameters ---------- filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset outfilename: name of output file. If blank, will be same name as input with *.nc extension added variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4 format kwargs: arguments passed to variable creation: zlib Note: only rasters with descending y coordinates are currently supported """ start = time.time() if isinstance(filename_or_raster, string_types): if not os.path.exists(filename_or_raster): raise ValueError( 'File does not exist: {0}'.format(filename_or_raster)) src = rasterio.open(filename_or_raster) managed_raster = True else: src = filename_or_raster managed_raster = False if not src.count == 1: raise NotImplementedError( 'ERROR: multi-band rasters not yet supported for this operation') prj = pyproj.Proj(**src.crs) outfilename = outfilename or src.name + '.nc' with Dataset(outfilename, 'w', format=format) as target: if is_latlong(prj): x_varname = 'longitude' y_varname = 'latitude' else: x_varname = 'x' y_varname = 'y' # TODO: may need to do this in blocks if source is big data = src.read(1, masked=True) coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj), src.width, src.height) coords.add_to_dataset(target, x_varname, y_varname, **kwargs) out_var = target.createVariable(variable_name, data.dtype, dimensions=(y_varname, x_varname), **kwargs) out_var[:] = data set_crs(target, variable_name, prj, set_proj4_att=False) if managed_raster: src.close() print('Elapsed {0:.3f} seconds'.format(time.time() - start))
def netcdf_to_raster(path_or_dataset, variable_name, outfilename, index=0, projection=None): """ Exports a 2D slice from a netcdf file to a raster file. Only GeoTiffs are supported at this time. Parameters ---------- path_or_dataset: path to NetCDF file or open Dataset variable_name: name of data variable to export from dataset outfilename: output filename index: index within 3rd dimension (in first position) or 0 projection: pyproj.Proj object. Automatically determined from file if possible """ if isinstance(path_or_dataset, string_types): dataset = Dataset(path_or_dataset) else: dataset = path_or_dataset projection = projection or get_crs(dataset, variable_name) if not projection: raise ValueError( 'Projection must be provided; ' 'no projection information can be determined from file') # TODO figure out cleaner way to get affine or coords y_name, x_name = dataset.variables[variable_name].dimensions[:2] coords = SpatialCoordinateVariables.from_dataset(dataset, x_name, y_name, projection=projection) affine = coords.affine if outfilename.lower().endswith('.tif'): format = 'GTiff' else: raise ValueError( 'Only GeoTiff outputs supported, filename must have .tif extension' ) variable = dataset.variables[variable_name] ndims = len(variable.shape) if ndims == 2: if index != 0: raise ValueError('Index out of range, must be 0') data = variable[:] elif ndims == 3: # Assumes that time dimension is first if index < 0 or index >= variable.shape[0]: raise ValueError('Index out of range, ' 'must be between 0 and {0}'.variable.shape[0]) data = variable[index] else: raise ValueError( 'Unsupported number of dimensions {0} for variable {1}, ' 'must be 2 or 3'.format(ndims, variable_name)) array_to_raster(data, outfilename, format=format, projection=projection, affine=affine)
def map_eems( eems_file, # output_directory, scale, format, src_crs, resampling): """ Render a NetCDF EEMS model to a web map. """ from EEMSBasePackage import EEMSCmd, EEMSProgram model = EEMSProgram(eems_file) # For each data producing command, store the netcdf file that contains it file_vars = dict() raw_variables = set() for cmd in model.orderedCmds: # This is bottom up, may want to invert filename = None variable = None if cmd.HasResultName(): filename = cmd.GetParam('OutFileName') variable = cmd.GetResultName() elif cmd.IsReadCmd(): filename = cmd.GetParam('OutFileName') variable = cmd.GetParam('NewFieldName') raw_variables.add(variable) if filename and variable: if not filename in file_vars: file_vars[filename] = [] file_vars[filename].append(variable) filenames = file_vars.keys() for filename in filenames: if not os.path.exists(filename): raise click.ClickException( 'Could not find data file from EEMS model: {0}'.format( filename)) dst_crs = 'EPSG:3857' output_directory = tempfile.mkdtemp() click.echo('Using temp directory: {0}'.format(output_directory)) # if not os.path.exists(output_directory): # os.makedirs(output_directory) # Since fuzzy renderer is hardcoded, we can output it now fuzzy_renderer = palette_to_stretched_renderer(DEFAULT_PALETTES['fuzzy'], '1,-1') fuzzy_renderer.get_legend(image_height=150)[0].to_image().save( os.path.join(output_directory, 'fuzzy_legend.png')) template_filename = filenames[0] template_var = file_vars[template_filename][0] with Dataset(template_filename) as ds: var_obj = ds.variables[template_var] dimensions = var_obj.dimensions shape = var_obj.shape num_dimensions = len(shape) if num_dimensions != 2: raise click.ClickException( 'Only 2 dimensions are allowed on data variables for now') ds_crs = get_crs(ds, template_var) if not ds_crs and is_geographic(ds, template_var): ds_crs = 'EPSG:4326' # Assume all geographic data is WGS84 src_crs = CRS.from_string(ds_crs) if ds_crs else CRS( {'init': src_crs}) if src_crs else None # get transforms, assume last 2 dimensions on variable are spatial in row, col order y_dim, x_dim = dimensions[-2:] coords = SpatialCoordinateVariables.from_dataset( ds, x_dim, y_dim, projection=Proj(src_crs) if src_crs else None) # # if mask is not None and not mask.shape == shape[-2:]: # # Will likely break before this if collecting statistics # raise click.BadParameter( # 'mask variable shape does not match shape of input spatial dimensions', # param='--mask', param_hint='--mask' # ) # if not src_crs: raise click.BadParameter('must provide src_crs to reproject', param='--src-crs', param_hint='--src-crs') dst_crs = CRS.from_string(dst_crs) src_height, src_width = coords.shape dst_transform, dst_width, dst_height = calculate_default_transform( src_crs, dst_crs, src_width, src_height, *coords.bbox.as_list()) reproject_kwargs = { 'src_crs': src_crs, 'src_transform': coords.affine, 'dst_crs': dst_crs, 'dst_transform': dst_transform, 'resampling': getattr(Resampling, resampling), 'dst_shape': (dst_height, dst_width) } if not (dst_crs or src_crs): raise click.BadParameter( 'must provide valid src_crs to get interactive map', param='--src-crs', param_hint='--src-crs') leaflet_anchors = get_leaflet_anchors( BBox.from_affine(dst_transform, dst_width, dst_height, projection=Proj(dst_crs) if dst_crs else None)) layers = {} for filename in filenames: with Dataset(filename) as ds: click.echo('Processing dataset {0}'.format(filename)) for variable in file_vars[filename]: click.echo('Processing variable {0}'.format(variable)) if not variable in ds.variables: raise click.ClickException( 'variable {0} was not found in file: {1}'.format( variable, filename)) var_obj = ds.variables[variable] if not var_obj.dimensions == dimensions: raise click.ClickException( 'All datasets must have the same dimensions for {0}'. format(variable)) data = var_obj[:] # if mask is not None: # data = numpy.ma.masked_array(data, mask=mask) if variable in raw_variables: palette = DEFAULT_PALETTES['raw'] palette_stretch = '{0},{1}'.format(data.max(), data.min()) renderer = palette_to_stretched_renderer( palette, palette_stretch) renderer.get_legend( image_height=150, max_precision=2)[0].to_image().save( os.path.join(output_directory, '{0}_legend.png'.format(variable))) else: renderer = fuzzy_renderer image_filename = os.path.join( output_directory, '{0}.{1}'.format(variable, format)) data = warp_array(data, **reproject_kwargs) render_image(renderer, data, image_filename, scale=scale, format=format) local_filename = os.path.split(image_filename)[1] layers[variable] = local_filename index_html = os.path.join(output_directory, 'index.html') with open(index_html, 'w') as out: template = Environment( loader=PackageLoader('trefoil.cli')).get_template('eems_map.html') out.write( template.render(layers=json.dumps(layers), bounds=str(leaflet_anchors), tree=[[cmd, depth] for (cmd, depth) in model.GetCmdTree()], raw_variables=list(raw_variables))) webbrowser.open(index_html)