Python extract_ancillary_metadata示例，wagl.metadata.extract_ancillary_metadata Python示例

示例#1

0

显示文件

def ecwmf_surface_pressure(input_path, lonlat, time):
    """
    Retrieve a pixel value from the ECWMF Surface Pressure
    collection.
    Scales the result by 100 before returning.
    """
    product = DatasetName.SURFACE_PRESSURE.value.lower()
    search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value)
    files = glob.glob(search.format(product=product, year=time.year))
    data = None
    required_ymd = datetime.datetime(time.year, time.month, time.day)
    for f in files:
        url = urlparse(f, scheme='file').geturl()
        ymd = splitext(basename(f))[0].split('_')[1]
        ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d')
        if ancillary_ymd == required_ymd:
            data = get_pixel(f, lonlat) / 100.0

            metadata = {
                'data_source': 'ECWMF Surface Pressure',
                'url': url,
                'query_date': time
            }

            # ancillary metadata tracking
            md = extract_ancillary_metadata(f)
            for key in md:
                metadata[key] = md[key]

            return data, metadata

    if data is None:
        raise AncillaryError("No ECWMF Surface Pressure data")

示例#2

0

显示文件

def ecwmf_water_vapour(input_path, lonlat, time):
    """
    Retrieve a pixel value from the ECWMF Total Column Water Vapour
    collection.
    """
    product = DatasetName.WATER_VAPOUR.value.lower()
    search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value)
    files = glob.glob(search.format(product=product, year=time.year))
    data = None
    required_ymd = datetime.datetime(time.year, time.month, time.day)
    for f in files:
        url = urlparse(f, scheme='file').geturl()
        ymd = splitext(basename(f))[0].split('_')[1]
        ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d')
        if ancillary_ymd == required_ymd:
            data = get_pixel(f, lonlat)

            metadata = {
                'data_source': 'ECWMF Total Column Water Vapour',
                'url': url,
                'query_date': time
            }

            # ancillary metadata tracking
            md = extract_ancillary_metadata(f)
            for key in md:
                metadata[key] = md[key]

            return data, metadata

    if data is None:
        raise AncillaryError("No ECWMF Total Column Water Vapour data")

示例#3

0

显示文件

def get_elevation_data(lonlat, dem_path):
    """
    Get elevation data for a scene.

    :param lon_lat:
        The latitude, longitude of the scene center.
    :type lon_lat:
        float (2-tuple)

    :dem_dir:
        The directory in which the DEM can be found.
    :type dem_dir:
        str
    """
    datafile = pjoin(dem_path, "DEM_one_deg.tif")
    url = urlparse(datafile, scheme='file').geturl()

    try:
        data = get_pixel(datafile, lonlat) * 0.001  # scale to correct units
    except IndexError:
        raise AncillaryError("No Elevation data")

    metadata = {'data_source': 'Elevation', 'url': url}

    # ancillary metadata tracking
    md = extract_ancillary_metadata(datafile)
    for key in md:
        metadata[key] = md[key]

    return data, metadata

示例#4

0

显示文件

def get_water_vapour(acquisition, water_vapour_dict, scale_factor=0.1):
    """
    Retrieve the water vapour value for an `acquisition` and the
    path for the water vapour ancillary data.
    """
    dt = acquisition.acquisition_datetime
    geobox = acquisition.gridded_geo_box()

    year = dt.strftime('%Y')
    filename = "pr_wtr.eatm.{year}.tif".format(year=year)

    if 'user' in water_vapour_dict:
        metadata = {'data_source': 'User defined value'}
        return water_vapour_dict['user'], metadata
    else:
        water_vapour_path = water_vapour_dict['pathname']

    datafile = pjoin(water_vapour_path, filename)
    url = urlparse(datafile, scheme='file').geturl()

    # calculate the water vapour band number based on the datetime

    doy = dt.timetuple().tm_yday
    hour = dt.timetuple().tm_hour
    band = (int(doy) - 1) * 4 + int((hour + 3) / 6)

    # Check for boundary condition: 1 Jan, 0-3 hours
    if band == 0 and doy == 1:
        band = 1

    # Get the number of bands
    with rasterio.open(datafile) as src:
        n_bands = src.count

    # Enable NBAR Near Real Time (NRT) processing
    if band > (n_bands + 1):
        rasterdoy = (((n_bands) - (int((hour + 3) / 6))) / 4) + 1
        if (doy - rasterdoy) < 7:
            band = (int(rasterdoy) - 1) * 4 + int((hour + 3) / 6)

    try:
        data = get_pixel(datafile, geobox.centre_lonlat, band=band)
    except IndexError:
        raise AncillaryError("No Water Vapour data")

    data = data * scale_factor

    metadata = {'data_source': 'Water Vapour', 'url': url, 'query_date': dt}

    # ancillary metadata tracking
    md = extract_ancillary_metadata(datafile)
    for key in md:
        metadata[key] = md[key]

    return data, metadata

示例#5

0

显示文件

def ecwmf_geo_potential(input_path, lonlat, time):
    """
    Retrieve a pixel value from the ECWMF Geo-Potential collection
    across 37 height pressure levels, for a given longitude,
    latitude and time.

    Converts to geo-potential height in KM, and reverses the order of
    the elements (1000 -> 1 mb, rather than 1 -> 1000 mb) before
    returning.
    """
    product = DatasetName.GEOPOTENTIAL.value.lower()
    search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value)
    files = glob.glob(search.format(product=product, year=time.year))
    data = None
    required_ymd = datetime.datetime(time.year, time.month, time.day)
    for f in files:
        url = urlparse(f, scheme='file').geturl()
        ymd = splitext(basename(f))[0].split('_')[1]
        ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d')
        if ancillary_ymd == required_ymd:
            bands = list(range(1, 38))
            data = get_pixel(f, lonlat, bands)[::-1]
            scaled_data = data / 9.80665 / 1000.0

            metadata = {
                'data_source': 'ECWMF Geo-Potential',
                'url': url,
                'query_date': time
            }

            # ancillary metadata tracking
            md = extract_ancillary_metadata(f)
            for key in md:
                metadata[key] = md[key]

            # internal file metadata (and reverse the ordering)
            df = read_metadata_tags(f, bands).iloc[::-1]
            df.insert(0, 'GeoPotential', data)
            df.insert(1, 'GeoPotential_Height', scaled_data)

            return df, md

    if data is None:
        raise AncillaryError("No ECWMF Geo-Potential profile data")

示例#6

0

显示文件

def ecwmf_relative_humidity(input_path, lonlat, time):
    """
    Retrieve a pixel value from the ECWMF Relative Humidity collection
    across 37 height pressure levels, for a given longitude,
    latitude and time.

    Reverses the order of elements
    (1000 -> 1 mb, rather than 1 -> 1000 mb) before returning.
    """
    product = DatasetName.RELATIVE_HUMIDITY.value.lower()
    search = pjoin(input_path, DatasetName.ECMWF_PATH_FMT.value)
    files = glob.glob(search.format(product=product, year=time.year))
    data = None
    required_ymd = datetime.datetime(time.year, time.month, time.day)
    for f in files:
        url = urlparse(f, scheme='file').geturl()
        ymd = splitext(basename(f))[0].split('_')[1]
        ancillary_ymd = datetime.datetime.strptime(ymd, '%Y-%m-%d')
        if ancillary_ymd == required_ymd:
            bands = list(range(1, 38))
            data = get_pixel(f, lonlat, bands)[::-1]

            metadata = {
                'data_source': 'ECWMF Relative Humidity',
                'url': url,
                'query_date': time
            }

            # file level metadata
            md = extract_ancillary_metadata(f)
            for key in md:
                metadata[key] = md[key]

            # internal file metadata (and reverse the ordering)
            df = read_metadata_tags(f, bands).iloc[::-1]
            df.insert(0, 'Relative_Humidity', data)

            return df, metadata

    if data is None:
        raise AncillaryError("No ECWMF Relative Humidity profile data")

示例#7

0

显示文件

def ecwmf_elevation(datafile, lonlat):
    """
    Retrieve a pixel from the ECWMF invariant geo-potential
    dataset.
    Converts to Geo-Potential height in KM.
    2 metres is added to the result before returning.
    """
    try:
        data = get_pixel(datafile, lonlat) / 9.80665 / 1000.0 + 0.002
    except IndexError:
        raise AncillaryError("No Invariant Geo-Potential data")

    url = urlparse(datafile, scheme='file').geturl()

    metadata = {'data_source': 'ECWMF Invariant Geo-Potential', 'url': url}

    # ancillary metadata tracking
    md = extract_ancillary_metadata(datafile)
    for key in md:
        metadata[key] = md[key]

    return data, metadata

示例#8

0

显示文件

def get_ozone_data(ozone_path, lonlat, time):
    """
    Get ozone data for a scene. `lonlat` should be the (x,y) for the centre
    the scene.
    """
    filename = time.strftime('%b').lower() + '.tif'
    datafile = pjoin(ozone_path, filename)
    url = urlparse(datafile, scheme='file').geturl()

    try:
        data = get_pixel(datafile, lonlat)
    except IndexError:
        raise AncillaryError("No Ozone data")

    metadata = {'data_source': 'Ozone', 'url': url, 'query_date': time}

    # ancillary metadata tracking
    md = extract_ancillary_metadata(datafile)
    for key in md:
        metadata[key] = md[key]

    return data, metadata

示例#9

0

显示文件

文件： brdf.py 项目： ASVincent/wagl

def get_brdf_data(acquisition,
                  brdf_primary_path,
                  brdf_secondary_path,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Calculates the mean BRDF value for the given acquisition,
    for each BRDF parameter ['geo', 'iso', 'vol'] that covers
    the acquisition's extents.

    :param acquisition:
        An instance of an acquisitions object.

    :param brdf_primary_path:
        A string containing the full file system path to your directory
        containing the source BRDF files.  The BRDF directories are
        assumed to be yyyy.mm.dd naming convention.

    :param brdf_secondary_path:
        A string containing the full file system path to your directory
        containing the Jupp-Li backup BRDF data.  To be used for
        pre-MODIS and potentially post-MODIS acquisitions.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        A `dict` with the keys:

            * BrdfParameters.ISO
            * BrdfParameters.VOL
            * BrdfParameters.GEO

        Values for each BRDF Parameter are accessed via the key named
        `value`.

    :notes:
        The keywords compression and filter_opts aren't used as we no
        longer save the BRDF imagery. However, we may need to store
        tables in future, therefore they can remain until we know
        for sure they'll never be used.
    """
    def find_file(files, brdf_wl, parameter):
        """Find file with a specific name."""
        for f in files:
            if f.find(brdf_wl) != -1 and f.find(parameter) != -1:
                return f
        return None

    # Compute the geobox
    geobox = acquisition.gridded_geo_box()

    # Get the date of acquisition
    dt = acquisition.acquisition_datetime.date()

    # Compare the scene date and MODIS BRDF start date to select the
    # BRDF data root directory.
    # Scene dates outside the range of the CSIRO mosaic data
    # should use the pre-MODIS, Jupp-Li BRDF.
    brdf_dir_list = sorted(os.listdir(brdf_primary_path))

    try:
        brdf_dir_range = [brdf_dir_list[0], brdf_dir_list[-1]]
        brdf_range = [
            datetime.date(*[int(x) for x in y.split('.')])
            for y in brdf_dir_range
        ]

        use_JuppLi_brdf = (dt < brdf_range[0] or dt > brdf_range[1])
    except IndexError:
        use_JuppLi_brdf = True  # use JuppLi if no primary data available

    if use_JuppLi_brdf:
        brdf_base_dir = brdf_secondary_path
        brdf_dirs = get_brdf_dirs_pre_modis(brdf_base_dir, dt)
    else:
        brdf_base_dir = brdf_primary_path
        brdf_dirs = get_brdf_dirs_modis(brdf_base_dir, dt)

    # The following hdflist code was resurrected from the old SVN repo. JS
    # get all HDF files in the input dir
    dbDir = pjoin(brdf_base_dir, brdf_dirs)
    three_tup = os.walk(dbDir)
    hdflist = []
    hdfhome = None

    for (hdfhome, _, filelist) in three_tup:
        for f in filelist:
            if f.endswith(".hdf.gz") or f.endswith(".hdf"):
                hdflist.append(f)

    results = {}
    for param in BrdfParameters:
        hdf_fname = find_file(hdflist, acquisition.brdf_wavelength,
                              param.name.lower())

        hdfFile = pjoin(hdfhome, hdf_fname)

        # Test if the file exists and has correct permissions
        try:
            with open(hdfFile, 'rb') as f:
                pass
        except IOError:
            print("Unable to open file %s" % hdfFile)

        with tempfile.TemporaryDirectory() as tmpdir:
            # Unzip if we need to
            if hdfFile.endswith(".hdf.gz"):
                hdf_file = pjoin(tmpdir,
                                 re.sub(".hdf.gz", ".hdf", basename(hdfFile)))
                cmd = "gunzip -c %s > %s" % (hdfFile, hdf_file)
                subprocess.check_call(cmd, shell=True)
            else:
                hdf_file = hdfFile

            # Load the file
            brdf_object = BRDFLoader(hdf_file,
                                     ul=geobox.ul_lonlat,
                                     lr=geobox.lr_lonlat)

            # guard against roi's that don't intersect
            if not brdf_object.intersects:
                msg = "ROI is outside the BRDF extents!"
                log.error(msg)
                raise Exception(msg)

            # calculate the mean value
            brdf_mean_value = brdf_object.mean_data_value()

        # Add the brdf filename and mean value to brdf_dict
        url = urlparse(hdfFile, scheme='file').geturl()
        res = {'data_source': 'BRDF', 'url': url, 'value': brdf_mean_value}

        # ancillary metadata tracking
        md = extract_ancillary_metadata(hdfFile)
        for key in md:
            res[key] = md[key]

        results[param] = res

    # check for no brdf (iso, vol, geo) (0, 0, 0) and convert to (1, 0, 0)
    # and strip any file level metadata
    if all([v['value'] == 0 for _, v in results.items()]):
        results[BrdfParameters.ISO] = {'value': 1.0}
        results[BrdfParameters.VOL] = {'value': 0.0}
        results[BrdfParameters.GEO] = {'value': 0.0}

    # add very basic brdf description metadata and the roi polygon
    for param in BrdfParameters:
        results[param]['extents'] = wkt.dumps(brdf_object.roi_polygon)

    return results

示例#10

0

显示文件

def get_aerosol_data(acquisition, aerosol_dict):
    """
    Extract the aerosol value for an acquisition.
    The version 2 retrieves the data from a HDF5 file, and provides
    more control over how the data is selected geo-metrically.
    Better control over timedeltas.
    """

    dt = acquisition.acquisition_datetime
    geobox = acquisition.gridded_geo_box()
    roi_poly = Polygon([
        geobox.ul_lonlat, geobox.ur_lonlat, geobox.lr_lonlat, geobox.ll_lonlat
    ])

    descr = ['AATSR_PIX', 'AATSR_CMP_YEAR_MONTH', 'AATSR_CMP_MONTH']
    names = [
        'ATSR_LF_%Y%m', 'aot_mean_%b_%Y_All_Aerosols',
        'aot_mean_%b_All_Aerosols'
    ]
    exts = ['/pix', '/cmp', '/cmp']
    pathnames = [ppjoin(ext, dt.strftime(n)) for ext, n in zip(exts, names)]

    # temporary until we sort out a better default mechanism
    # how do we want to support default values, whilst still support provenance
    if 'user' in aerosol_dict:
        metadata = {'data_source': 'User defined value'}
        return aerosol_dict['user'], metadata
    else:
        aerosol_fname = aerosol_dict['pathname']

    fid = h5py.File(aerosol_fname, 'r')
    url = urlparse(aerosol_fname, scheme='file').geturl()

    delta_tolerance = datetime.timedelta(days=0.5)

    data = None
    for pathname, description in zip(pathnames, descr):
        if pathname in fid:
            df = read_h5_table(fid, pathname)
            aerosol_poly = wkt.loads(fid[pathname].attrs['extents'])

            if aerosol_poly.intersects(roi_poly):
                if description == 'AATSR_PIX':
                    abs_diff = (df['timestamp'] - dt).abs()
                    df = df[abs_diff < delta_tolerance]
                    df.reset_index(inplace=True, drop=True)

                if df.shape[0] == 0:
                    continue

                intersection = aerosol_poly.intersection(roi_poly)
                pts = GeoSeries(
                    [Point(x, y) for x, y in zip(df['lon'], df['lat'])])
                idx = pts.within(intersection)
                data = df[idx]['aerosol'].mean()

                if numpy.isfinite(data):
                    metadata = {
                        'data_source': description,
                        'dataset_pathname': pathname,
                        'query_date': dt,
                        'url': url,
                        'extents': wkt.dumps(intersection)
                    }

                    # ancillary metadata tracking
                    md = extract_ancillary_metadata(aerosol_fname)
                    for key in md:
                        metadata[key] = md[key]

                    fid.close()
                    return data, metadata

    # now we officially support a default value of 0.05 which
    # should make the following redundant ....

    # default aerosol value
    # assumes we are only processing Australia in which case it it should
    # be a coastal scene
    data = 0.06
    metadata = {'data_source': 'Default value used; Assumed a coastal scene'}

    fid.close()
    return data, metadata

示例#11

0

显示文件

文件： land_sea_masking.py 项目： truth-quark/wagl

def calc_land_sea_mask(geo_box, \
        ancillary_path='/g/data/v10/eoancillarydata/Land_Sea_Rasters'):
    """
    Creates a Land/Sea mask.

    :param geo_box:
        An instance of GriddedGeoBox defining the region for which the
        land/sea mask is required.

            * WARNING: geo_box.crs must be UTM!!!

    :param ancillary_mask:
        The path to the directory containing the land/sea data files.

    :return:
        A 2D Numpy Boolean array. True = Land, False = Sea.

    :note:
        The function does not currently support reprojections. The
        GriddedGeoBox must have CRS and Pixelsize matching the
        ancillary data GeoTiffs.

    :TODO:
        Support reprojection to any arbitrary GriddedGeoBox.
    """
    def img2map(geoTransform, pixel):
        """
        Converts a pixel (image) co-ordinate into a map co-ordinate.
        :param geoTransform:
            The Image co-ordinate information (upper left coords, offset
            and pixel sizes).

        :param pixel:
            A tuple containg the y and x image co-ordinates.

        :return:
            A tuple containg the x and y map co-ordinates.
        """

        mapx = pixel[1] * geoTransform[1] + geoTransform[0]
        mapy = geoTransform[3] - (pixel[0] * (numpy.abs(geoTransform[5])))

        return (mapx, mapy)

    def map2img(geoTransform, location):
        """
        Converts a map co-ordinate into a pixel (image) co-ordinate.

        :param geoTransform:
            The Image co-ordinate information (upper left coords, offset
            and pixel sizes).

        :param location:
            A tuple containg the x and y map co-ordinates.

        :return:
            A tuple containg the y and x image co-ordinates.
        """

        imgx = int(
            numpy.round((location[0] - geoTransform[0]) / geoTransform[1]))
        imgy = int(
            numpy.round(
                (geoTransform[3] - location[1]) / numpy.abs(geoTransform[5])))
        return (imgy, imgx)

    # get Land/Sea data file for this bounding box
    utm_zone = geo_box.crs.GetUTMZone()

    rasfile = os.path.join(ancillary_path, 'WORLDzone%02d.tif' % abs(utm_zone))
    assert os.path.exists(
        rasfile), 'ERROR: Raster File Not Found (%s)' % rasfile

    md = extract_ancillary_metadata(rasfile)
    md['data_source'] = 'Rasterised Land/Sea Mask'
    md['data_file'] = rasfile
    metadata = {'land_sea_mask': md}

    geoTransform = geo_box.transform.to_gdal()
    if geoTransform is None:
        raise Exception('Image geotransformation Info is needed')

    dims = geo_box.shape

    lsobj = gdal.Open(rasfile, gdal.gdalconst.GA_ReadOnly)
    ls_geoT = lsobj.GetGeoTransform()

    # Convert the images' image co-ords into map co-ords
    mUL = img2map(geoTransform=geoTransform, pixel=(0, 0))
    mLR = img2map(geoTransform=geoTransform, pixel=(dims[0], dims[1]))

    # Convert the map co-ords into the rasfile image co-ords
    iUL = map2img(geoTransform=ls_geoT, location=mUL)
    iLR = map2img(geoTransform=ls_geoT, location=mLR)

    xoff = iUL[1]
    yoff = iUL[0]
    xsize = iLR[1] - xoff
    ysize = iLR[0] - yoff

    # Read in the land/sea array
    ls_arr = lsobj.ReadAsArray(xoff, yoff, xsize, ysize)

    return (ls_arr.astype('bool'), metadata)