def ecwmf_surface_pressure(input_path, lonlat, time): """ Retrieve a pixel value from the ECWMF Surface Pressure collection. Scales the result by 100 before returning. """ product = DatasetName.SURFACE_PRESSURE.value.lower() search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value) files = glob.glob(search.format(product=product, year=time.year)) data = None required_ymd = datetime.datetime(time.year, time.month, time.day) for f in files: url = urlparse(f, scheme='file').geturl() ymd = splitext(basename(f))[0].split('_')[1] ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d') if ancillary_ymd == required_ymd: data = get_pixel(f, lonlat) / 100.0 metadata = { 'data_source': 'ECWMF Surface Pressure', 'url': url, 'query_date': time } # ancillary metadata tracking md = extract_ancillary_metadata(f) for key in md: metadata[key] = md[key] return data, metadata if data is None: raise AncillaryError("No ECWMF Surface Pressure data")
def ecwmf_water_vapour(input_path, lonlat, time): """ Retrieve a pixel value from the ECWMF Total Column Water Vapour collection. """ product = DatasetName.WATER_VAPOUR.value.lower() search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value) files = glob.glob(search.format(product=product, year=time.year)) data = None required_ymd = datetime.datetime(time.year, time.month, time.day) for f in files: url = urlparse(f, scheme='file').geturl() ymd = splitext(basename(f))[0].split('_')[1] ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d') if ancillary_ymd == required_ymd: data = get_pixel(f, lonlat) metadata = { 'data_source': 'ECWMF Total Column Water Vapour', 'url': url, 'query_date': time } # ancillary metadata tracking md = extract_ancillary_metadata(f) for key in md: metadata[key] = md[key] return data, metadata if data is None: raise AncillaryError("No ECWMF Total Column Water Vapour data")
def get_elevation_data(lonlat, dem_path): """ Get elevation data for a scene. :param lon_lat: The latitude, longitude of the scene center. :type lon_lat: float (2-tuple) :dem_dir: The directory in which the DEM can be found. :type dem_dir: str """ datafile = pjoin(dem_path, "DEM_one_deg.tif") url = urlparse(datafile, scheme='file').geturl() try: data = get_pixel(datafile, lonlat) * 0.001 # scale to correct units except IndexError: raise AncillaryError("No Elevation data") metadata = {'data_source': 'Elevation', 'url': url} # ancillary metadata tracking md = extract_ancillary_metadata(datafile) for key in md: metadata[key] = md[key] return data, metadata
def get_water_vapour(acquisition, water_vapour_dict, scale_factor=0.1): """ Retrieve the water vapour value for an `acquisition` and the path for the water vapour ancillary data. """ dt = acquisition.acquisition_datetime geobox = acquisition.gridded_geo_box() year = dt.strftime('%Y') filename = "pr_wtr.eatm.{year}.tif".format(year=year) if 'user' in water_vapour_dict: metadata = {'data_source': 'User defined value'} return water_vapour_dict['user'], metadata else: water_vapour_path = water_vapour_dict['pathname'] datafile = pjoin(water_vapour_path, filename) url = urlparse(datafile, scheme='file').geturl() # calculate the water vapour band number based on the datetime doy = dt.timetuple().tm_yday hour = dt.timetuple().tm_hour band = (int(doy) - 1) * 4 + int((hour + 3) / 6) # Check for boundary condition: 1 Jan, 0-3 hours if band == 0 and doy == 1: band = 1 # Get the number of bands with rasterio.open(datafile) as src: n_bands = src.count # Enable NBAR Near Real Time (NRT) processing if band > (n_bands + 1): rasterdoy = (((n_bands) - (int((hour + 3) / 6))) / 4) + 1 if (doy - rasterdoy) < 7: band = (int(rasterdoy) - 1) * 4 + int((hour + 3) / 6) try: data = get_pixel(datafile, geobox.centre_lonlat, band=band) except IndexError: raise AncillaryError("No Water Vapour data") data = data * scale_factor metadata = {'data_source': 'Water Vapour', 'url': url, 'query_date': dt} # ancillary metadata tracking md = extract_ancillary_metadata(datafile) for key in md: metadata[key] = md[key] return data, metadata
def ecwmf_geo_potential(input_path, lonlat, time): """ Retrieve a pixel value from the ECWMF Geo-Potential collection across 37 height pressure levels, for a given longitude, latitude and time. Converts to geo-potential height in KM, and reverses the order of the elements (1000 -> 1 mb, rather than 1 -> 1000 mb) before returning. """ product = DatasetName.GEOPOTENTIAL.value.lower() search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value) files = glob.glob(search.format(product=product, year=time.year)) data = None required_ymd = datetime.datetime(time.year, time.month, time.day) for f in files: url = urlparse(f, scheme='file').geturl() ymd = splitext(basename(f))[0].split('_')[1] ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d') if ancillary_ymd == required_ymd: bands = list(range(1, 38)) data = get_pixel(f, lonlat, bands)[::-1] scaled_data = data / 9.80665 / 1000.0 metadata = { 'data_source': 'ECWMF Geo-Potential', 'url': url, 'query_date': time } # ancillary metadata tracking md = extract_ancillary_metadata(f) for key in md: metadata[key] = md[key] # internal file metadata (and reverse the ordering) df = read_metadata_tags(f, bands).iloc[::-1] df.insert(0, 'GeoPotential', data) df.insert(1, 'GeoPotential_Height', scaled_data) return df, md if data is None: raise AncillaryError("No ECWMF Geo-Potential profile data")
def ecwmf_relative_humidity(input_path, lonlat, time): """ Retrieve a pixel value from the ECWMF Relative Humidity collection across 37 height pressure levels, for a given longitude, latitude and time. Reverses the order of elements (1000 -> 1 mb, rather than 1 -> 1000 mb) before returning. """ product = DatasetName.RELATIVE_HUMIDITY.value.lower() search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value) files = glob.glob(search.format(product=product, year=time.year)) data = None required_ymd = datetime.datetime(time.year, time.month, time.day) for f in files: url = urlparse(f, scheme='file').geturl() ymd = splitext(basename(f))[0].split('_')[1] ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d') if ancillary_ymd == required_ymd: bands = list(range(1, 38)) data = get_pixel(f, lonlat, bands)[::-1] metadata = { 'data_source': 'ECWMF Relative Humidity', 'url': url, 'query_date': time } # file level metadata md = extract_ancillary_metadata(f) for key in md: metadata[key] = md[key] # internal file metadata (and reverse the ordering) df = read_metadata_tags(f, bands).iloc[::-1] df.insert(0, 'Relative_Humidity', data) return df, metadata if data is None: raise AncillaryError("No ECWMF Relative Humidity profile data")
def ecwmf_elevation(datafile, lonlat): """ Retrieve a pixel from the ECWMF invariant geo-potential dataset. Converts to Geo-Potential height in KM. 2 metres is added to the result before returning. """ try: data = get_pixel(datafile, lonlat) / 9.80665 / 1000.0 + 0.002 except IndexError: raise AncillaryError("No Invariant Geo-Potential data") url = urlparse(datafile, scheme='file').geturl() metadata = {'data_source': 'ECWMF Invariant Geo-Potential', 'url': url} # ancillary metadata tracking md = extract_ancillary_metadata(datafile) for key in md: metadata[key] = md[key] return data, metadata
def get_ozone_data(ozone_path, lonlat, time): """ Get ozone data for a scene. `lonlat` should be the (x,y) for the centre the scene. """ filename = time.strftime('%b').lower() + '.tif' datafile = pjoin(ozone_path, filename) url = urlparse(datafile, scheme='file').geturl() try: data = get_pixel(datafile, lonlat) except IndexError: raise AncillaryError("No Ozone data") metadata = {'data_source': 'Ozone', 'url': url, 'query_date': time} # ancillary metadata tracking md = extract_ancillary_metadata(datafile) for key in md: metadata[key] = md[key] return data, metadata
def get_brdf_data(acquisition, brdf_primary_path, brdf_secondary_path, compression=H5CompressionFilter.LZF, filter_opts=None): """ Calculates the mean BRDF value for the given acquisition, for each BRDF parameter ['geo', 'iso', 'vol'] that covers the acquisition's extents. :param acquisition: An instance of an acquisitions object. :param brdf_primary_path: A string containing the full file system path to your directory containing the source BRDF files. The BRDF directories are assumed to be yyyy.mm.dd naming convention. :param brdf_secondary_path: A string containing the full file system path to your directory containing the Jupp-Li backup BRDF data. To be used for pre-MODIS and potentially post-MODIS acquisitions. :param compression: The compression filter to use. Default is H5CompressionFilter.LZF :filter_opts: A dict of key value pairs available to the given configuration instance of H5CompressionFilter. For example H5CompressionFilter.LZF has the keywords *chunks* and *shuffle* available. Default is None, which will use the default settings for the chosen H5CompressionFilter instance. :return: A `dict` with the keys: * BrdfParameters.ISO * BrdfParameters.VOL * BrdfParameters.GEO Values for each BRDF Parameter are accessed via the key named `value`. :notes: The keywords compression and filter_opts aren't used as we no longer save the BRDF imagery. However, we may need to store tables in future, therefore they can remain until we know for sure they'll never be used. """ def find_file(files, brdf_wl, parameter): """Find file with a specific name.""" for f in files: if f.find(brdf_wl) != -1 and f.find(parameter) != -1: return f return None # Compute the geobox geobox = acquisition.gridded_geo_box() # Get the date of acquisition dt = acquisition.acquisition_datetime.date() # Compare the scene date and MODIS BRDF start date to select the # BRDF data root directory. # Scene dates outside the range of the CSIRO mosaic data # should use the pre-MODIS, Jupp-Li BRDF. brdf_dir_list = sorted(os.listdir(brdf_primary_path)) try: brdf_dir_range = [brdf_dir_list[0], brdf_dir_list[-1]] brdf_range = [ datetime.date(*[int(x) for x in y.split('.')]) for y in brdf_dir_range ] use_JuppLi_brdf = (dt < brdf_range[0] or dt > brdf_range[1]) except IndexError: use_JuppLi_brdf = True # use JuppLi if no primary data available if use_JuppLi_brdf: brdf_base_dir = brdf_secondary_path brdf_dirs = get_brdf_dirs_pre_modis(brdf_base_dir, dt) else: brdf_base_dir = brdf_primary_path brdf_dirs = get_brdf_dirs_modis(brdf_base_dir, dt) # The following hdflist code was resurrected from the old SVN repo. JS # get all HDF files in the input dir dbDir = pjoin(brdf_base_dir, brdf_dirs) three_tup = os.walk(dbDir) hdflist = [] hdfhome = None for (hdfhome, _, filelist) in three_tup: for f in filelist: if f.endswith(".hdf.gz") or f.endswith(".hdf"): hdflist.append(f) results = {} for param in BrdfParameters: hdf_fname = find_file(hdflist, acquisition.brdf_wavelength, param.name.lower()) hdfFile = pjoin(hdfhome, hdf_fname) # Test if the file exists and has correct permissions try: with open(hdfFile, 'rb') as f: pass except IOError: print("Unable to open file %s" % hdfFile) with tempfile.TemporaryDirectory() as tmpdir: # Unzip if we need to if hdfFile.endswith(".hdf.gz"): hdf_file = pjoin(tmpdir, re.sub(".hdf.gz", ".hdf", basename(hdfFile))) cmd = "gunzip -c %s > %s" % (hdfFile, hdf_file) subprocess.check_call(cmd, shell=True) else: hdf_file = hdfFile # Load the file brdf_object = BRDFLoader(hdf_file, ul=geobox.ul_lonlat, lr=geobox.lr_lonlat) # guard against roi's that don't intersect if not brdf_object.intersects: msg = "ROI is outside the BRDF extents!" log.error(msg) raise Exception(msg) # calculate the mean value brdf_mean_value = brdf_object.mean_data_value() # Add the brdf filename and mean value to brdf_dict url = urlparse(hdfFile, scheme='file').geturl() res = {'data_source': 'BRDF', 'url': url, 'value': brdf_mean_value} # ancillary metadata tracking md = extract_ancillary_metadata(hdfFile) for key in md: res[key] = md[key] results[param] = res # check for no brdf (iso, vol, geo) (0, 0, 0) and convert to (1, 0, 0) # and strip any file level metadata if all([v['value'] == 0 for _, v in results.items()]): results[BrdfParameters.ISO] = {'value': 1.0} results[BrdfParameters.VOL] = {'value': 0.0} results[BrdfParameters.GEO] = {'value': 0.0} # add very basic brdf description metadata and the roi polygon for param in BrdfParameters: results[param]['extents'] = wkt.dumps(brdf_object.roi_polygon) return results
def get_aerosol_data(acquisition, aerosol_dict): """ Extract the aerosol value for an acquisition. The version 2 retrieves the data from a HDF5 file, and provides more control over how the data is selected geo-metrically. Better control over timedeltas. """ dt = acquisition.acquisition_datetime geobox = acquisition.gridded_geo_box() roi_poly = Polygon([ geobox.ul_lonlat, geobox.ur_lonlat, geobox.lr_lonlat, geobox.ll_lonlat ]) descr = ['AATSR_PIX', 'AATSR_CMP_YEAR_MONTH', 'AATSR_CMP_MONTH'] names = [ 'ATSR_LF_%Y%m', 'aot_mean_%b_%Y_All_Aerosols', 'aot_mean_%b_All_Aerosols' ] exts = ['/pix', '/cmp', '/cmp'] pathnames = [ppjoin(ext, dt.strftime(n)) for ext, n in zip(exts, names)] # temporary until we sort out a better default mechanism # how do we want to support default values, whilst still support provenance if 'user' in aerosol_dict: metadata = {'data_source': 'User defined value'} return aerosol_dict['user'], metadata else: aerosol_fname = aerosol_dict['pathname'] fid = h5py.File(aerosol_fname, 'r') url = urlparse(aerosol_fname, scheme='file').geturl() delta_tolerance = datetime.timedelta(days=0.5) data = None for pathname, description in zip(pathnames, descr): if pathname in fid: df = read_h5_table(fid, pathname) aerosol_poly = wkt.loads(fid[pathname].attrs['extents']) if aerosol_poly.intersects(roi_poly): if description == 'AATSR_PIX': abs_diff = (df['timestamp'] - dt).abs() df = df[abs_diff < delta_tolerance] df.reset_index(inplace=True, drop=True) if df.shape[0] == 0: continue intersection = aerosol_poly.intersection(roi_poly) pts = GeoSeries( [Point(x, y) for x, y in zip(df['lon'], df['lat'])]) idx = pts.within(intersection) data = df[idx]['aerosol'].mean() if numpy.isfinite(data): metadata = { 'data_source': description, 'dataset_pathname': pathname, 'query_date': dt, 'url': url, 'extents': wkt.dumps(intersection) } # ancillary metadata tracking md = extract_ancillary_metadata(aerosol_fname) for key in md: metadata[key] = md[key] fid.close() return data, metadata # now we officially support a default value of 0.05 which # should make the following redundant .... # default aerosol value # assumes we are only processing Australia in which case it it should # be a coastal scene data = 0.06 metadata = {'data_source': 'Default value used; Assumed a coastal scene'} fid.close() return data, metadata
def calc_land_sea_mask(geo_box, \ ancillary_path='/g/data/v10/eoancillarydata/Land_Sea_Rasters'): """ Creates a Land/Sea mask. :param geo_box: An instance of GriddedGeoBox defining the region for which the land/sea mask is required. * WARNING: geo_box.crs must be UTM!!! :param ancillary_mask: The path to the directory containing the land/sea data files. :return: A 2D Numpy Boolean array. True = Land, False = Sea. :note: The function does not currently support reprojections. The GriddedGeoBox must have CRS and Pixelsize matching the ancillary data GeoTiffs. :TODO: Support reprojection to any arbitrary GriddedGeoBox. """ def img2map(geoTransform, pixel): """ Converts a pixel (image) co-ordinate into a map co-ordinate. :param geoTransform: The Image co-ordinate information (upper left coords, offset and pixel sizes). :param pixel: A tuple containg the y and x image co-ordinates. :return: A tuple containg the x and y map co-ordinates. """ mapx = pixel[1] * geoTransform[1] + geoTransform[0] mapy = geoTransform[3] - (pixel[0] * (numpy.abs(geoTransform[5]))) return (mapx, mapy) def map2img(geoTransform, location): """ Converts a map co-ordinate into a pixel (image) co-ordinate. :param geoTransform: The Image co-ordinate information (upper left coords, offset and pixel sizes). :param location: A tuple containg the x and y map co-ordinates. :return: A tuple containg the y and x image co-ordinates. """ imgx = int( numpy.round((location[0] - geoTransform[0]) / geoTransform[1])) imgy = int( numpy.round( (geoTransform[3] - location[1]) / numpy.abs(geoTransform[5]))) return (imgy, imgx) # get Land/Sea data file for this bounding box utm_zone = geo_box.crs.GetUTMZone() rasfile = os.path.join(ancillary_path, 'WORLDzone%02d.tif' % abs(utm_zone)) assert os.path.exists( rasfile), 'ERROR: Raster File Not Found (%s)' % rasfile md = extract_ancillary_metadata(rasfile) md['data_source'] = 'Rasterised Land/Sea Mask' md['data_file'] = rasfile metadata = {'land_sea_mask': md} geoTransform = geo_box.transform.to_gdal() if geoTransform is None: raise Exception('Image geotransformation Info is needed') dims = geo_box.shape lsobj = gdal.Open(rasfile, gdal.gdalconst.GA_ReadOnly) ls_geoT = lsobj.GetGeoTransform() # Convert the images' image co-ords into map co-ords mUL = img2map(geoTransform=geoTransform, pixel=(0, 0)) mLR = img2map(geoTransform=geoTransform, pixel=(dims[0], dims[1])) # Convert the map co-ords into the rasfile image co-ords iUL = map2img(geoTransform=ls_geoT, location=mUL) iLR = map2img(geoTransform=ls_geoT, location=mLR) xoff = iUL[1] yoff = iUL[0] xsize = iLR[1] - xoff ysize = iLR[0] - yoff # Read in the land/sea array ls_arr = lsobj.ReadAsArray(xoff, yoff, xsize, ysize) return (ls_arr.astype('bool'), metadata)