Пример #1
0
def test_geom_clone():
    b = geometry.box(0, 0, 10, 20, epsg4326)
    assert b == b.clone()
    assert b.geom is not b.clone().geom

    assert b == geometry.Geometry(b)
    assert b.geom is not geometry.Geometry(b).geom
Пример #2
0
    def from_vector(self, vector_data):
        """Get the geobox to use for the grid.

        Parameters
        ----------
        vector_data: str or :obj:`geopandas.GeoDataFrame`
            A file path to an OGR supported source or GeoDataFrame
            containing the vector data.

        Returns
        -------
        :obj:`datacube.utils.geometry.GeoBox`
            The geobox for the grid to be generated from the vector data.

        """
        vector_data = load_vector_data(vector_data)

        if self.like is not None:
            assert (self.output_crs is
                    None), "'like' and 'output_crs' are not supported together"
            assert (self.resolution is
                    None), "'like' and 'resolution' are not supported together"
            assert self.align is None, "'like' and 'align' are not supported together"
            try:
                geobox = self.like.geobox
            except AttributeError:
                geobox = geobox_from_rio(self.like)
            return geobox

        if self.resolution is None:
            raise RuntimeError(
                "Must specify 'resolution' if 'like' not specified.")

        if self.output_crs:
            crs = geometry.CRS(self.output_crs)
        else:
            crs = geometry.CRS(crs_to_wkt(CRS.from_user_input(
                vector_data.crs)))

        if self.geom is None and self.output_crs:
            geopoly = geometry.Geometry(
                mapping(
                    box(*vector_data.to_crs(
                        crs._crs.ExportToProj4()).total_bounds)),
                crs=crs,
            )
        elif self.geom is None:
            geopoly = geometry.Geometry(mapping(
                box(*vector_data.total_bounds)),
                                        crs=crs)

        else:
            geom_json = json.loads(self.geom)
            geom_crs = geometry.CRS(
                "+init={}".format(geom_json["crs"]["properties"]["name"].lower(
                ) if "crs" in geom_json else "epsg:4326"))
            geopoly = geometry.Geometry(geom_json, crs=geom_crs)

        return geometry.GeoBox.from_geopolygon(geopoly, self.resolution, crs,
                                               self.align)
Пример #3
0
def test_3d_point_converted_to_2d_point():
    point = (-35.5029340, 145.9312455, 0.0)

    point_3d = {'coordinates': point, 'type': 'Point'}
    point_2d = {'coordinates': (point[0], point[1]), 'type': 'Point'}

    p_2d = geometry.Geometry(point_2d)
    p_3d = geometry.Geometry(point_3d)

    assert len(p_3d.coords[0]) == 2

    assert p_2d == p_3d
Пример #4
0
def test_lonlat_bounds():
    # example from landsat scene: spans lon=180
    poly = geometry.box(618300, -1876800, 849000, -1642500, 'EPSG:32660')

    bb = geometry.lonlat_bounds(poly)
    assert bb.left < 180 < bb.right
    assert geometry.lonlat_bounds(poly) == geometry.lonlat_bounds(poly, resolution=1e+8)

    bb = geometry.lonlat_bounds(poly, mode='quick')
    assert bb.right - bb.left > 180

    poly = geometry.box(1, -10, 2, 20, 'EPSG:4326')
    assert geometry.lonlat_bounds(poly) == poly.boundingbox

    with pytest.raises(ValueError):
        geometry.lonlat_bounds(geometry.box(0, 0, 1, 1, None))

    multi = {
        "type": "MultiPolygon",
        "coordinates": [
            [[[174, 52], [174, 53], [175, 53], [174, 52]]],
            [[[168, 54], [167, 55], [167, 54], [168, 54]]]
        ]
    }

    multi_geom = geometry.Geometry(multi, "epsg:4326")
    multi_geom_projected = multi_geom.to_crs('epsg:32659', math.inf)

    ll_bounds = geometry.lonlat_bounds(multi_geom)
    ll_bounds_projected = geometry.lonlat_bounds(multi_geom_projected)

    assert ll_bounds == approx(ll_bounds_projected)
Пример #5
0
def open_polygon_from_shapefile(shapefile, index_of_polygon_within_shapefile=0):

    '''This function takes a shapefile, selects a polygon as per your selection, 
    uses the datacube geometry object, along with shapely.geometry and fiona to 
    get the geom for the datacube query. It will also make sure you have the correct 
    crs object for the DEA

    Last modified: May 2018
    Author: Bex Dunn'''

    # open all the shapes within the shape file
    shapes = fiona.open(shapefile)
    i =index_of_polygon_within_shapefile
    #print('shapefile index is '+str(i))
    if i > len(shapes):
        print('index not in the range for the shapefile'+str(i)+' not in '+str(len(shapes)))
        sys.exit(0)
    #copy attributes from shapefile and define shape_name
    geom_crs = geometry.CRS(shapes.crs_wkt)
    geo = shapes[i]['geometry']
    geom = geometry.Geometry(geo, crs=geom_crs)
    geom_bs = shapely.geometry.shape(shapes[i]['geometry'])
    shape_name = shapefile.split('/')[-1].split('.')[0]+'_'+str(i)
    #print('the name of your shape is '+shape_name)
    #get your polygon out as a geom to go into the query, and the shape name for file names later
    return geom, shape_name          
Пример #6
0
    def extent(self) -> Optional[geometry.Geometry]:
        """ :returns: valid extent of the dataset or None
        """
        def xytuple(obj):
            return obj['x'], obj['y']

        # If no projection or crs, they have no extent.
        projection = self._gs
        if not projection:
            return None
        crs = self.crs
        if not crs:
            _LOG.debug("No CRS, assuming no extent (dataset %s)", self.id)
            return None

        valid_data = projection.get('valid_data')
        geo_ref_points = projection.get('geo_ref_points')
        if valid_data:
            return geometry.Geometry(valid_data, crs=crs)
        elif geo_ref_points:
            return geometry.polygon([
                xytuple(geo_ref_points[key])
                for key in ('ll', 'ul', 'ur', 'lr', 'll')
            ],
                                    crs=crs)

        return None
Пример #7
0
def test_3d_geometry_converted_to_2d_geometry():
    coordinates = [(115.8929714190001, -28.577007674999948, 0.0),
                   (115.90275429200005, -28.57698532699993, 0.0),
                   (115.90412631000004, -28.577577566999935, 0.0),
                   (115.90157040700001, -28.58521105999995, 0.0),
                   (115.89382838900008, -28.585473711999953, 0.0),
                   (115.8929714190001, -28.577007674999948, 0.0)]
    geom_3d = {'coordinates': [coordinates],
               'type': 'Polygon'}
    geom_2d = {'coordinates': [[(x, y) for x, y, z in coordinates]],
               'type': 'Polygon'}

    g_2d = geometry.Geometry(geom_2d)
    g_3d = geometry.Geometry(geom_3d)

    assert {2} == set(len(pt) for pt in g_3d.boundary.coords)  # All coordinates are 2D

    assert g_2d == g_3d  # 3D geometry has been converted to a 2D by dropping the Z axis
Пример #8
0
def create_long_SHP_arrays(shape_file, feature_id):

    feature = feature_from_shapefile(shape_file, feature_id)

    with fiona.open(shape_file) as shapes:
        crs = geometry.CRS(shapes.crs_wkt)

    geom = geometry.Geometry(feature['geometry'], crs=crs)

    dc = datacube.Datacube()

    query = {
        'time': ('2013-01-01', '2118-12-31'),
        'geopolygon': geom,
        'output_crs': 'EPSG:3577',
        'resampling': 'bilinear',
        'group_by': 'solar_day',
    }

    sb_names = [
        'nbart_coastal_aerosol', 'nbart_blue', 'nbart_green', 'nbart_red',
        'nbart_red_edge_1', 'nbart_red_edge_2', 'nbart_red_edge_3',
        'nbart_nir_1', 'nbart_nir_2', 'nbart_swir_2', 'nbart_swir_3', 'fmask'
    ]

    s2a_array = dc.load(product='s2a_ard_granule',
                        measurements=sb_names,
                        resolution=(-10, 10),
                        **query)
    s2b_array = dc.load(product='s2b_ard_granule',
                        measurements=sb_names,
                        resolution=(-10, 10),
                        **query)

    ls8_array = dc.load(product='ls8_nbart_scene',
                        resolution=(-30, 30),
                        **query)

    ls8_array = ls8_array.rename({
        '1': 'coastal_aerosol',
        '2': 'blue',
        '3': 'green',
        '4': 'red',
        '5': 'nir',
        '6': 'swir1',
        '7': 'swir2'
    })

    lmask = geometry_mask([geom], ls8_array.geobox, invert=True)
    ls8_array = ls8_array.where(lmask)

    smask = geometry_mask([geom], s2a_array.geobox, invert=True)
    s2a_array = s2a_array.where(smask)
    s2b_array = s2b_array.where(smask)

    return ls8_array, s2a_array, s2b_array
Пример #9
0
def _getData(shape, product, crs):
    dc = datacube.Datacube()
    dc_crs = datacube.utils.geometry.CRS(crs)
    g = geometry.Geometry(shape, crs=dc_crs)
    query = {'geopolygon': g}
    data = dc.load(product=product, **query)

    # # mask if polygon
    # mask = geometry_mask([g], data.geobox, invert=True)
    # masked = data.where(mask)

    return data
Пример #10
0
def main():
    with fiona.open('line.shp') as shapes:
        crs = geometry.CRS(shapes.crs_wkt)
        first_geometry = next(shapes)['geometry']
        line = geometry.Geometry(first_geometry, crs=crs)

    query = {'time': ('1990-01-01', '1991-01-01'), 'geopolygon': line}

    dc = datacube.Datacube(app='line-trans-recipe')
    data = dc.load(product='ls5_nbar_albers', measurements=['red'], **query)

    trans = transect(data, line, abs(data.affine.a))
    trans.red.plot(x='distance', y='time')
Пример #11
0
def main():
    shape_file = 'my_shape_file.shp'
    with fiona.open(shape_file) as shapes:
        crs = geometry.CRS(shapes.crs_wkt)
        first_geometry = next(iter(shapes))['geometry']
        geom = geometry.Geometry(first_geometry, crs=crs)

    query = {'time': ('1990-01-01', '1991-01-01'), 'geopolygon': geom}

    dc = datacube.Datacube(app='poly-drill-recipe')
    data = dc.load(product='ls5_nbar_albers', measurements=['red'], **query)

    mask = geometry_mask([geom], data.geobox, invert=True)
    data = data.where(mask)

    data.red.plot.imshow(col='time', col_wrap=5)
Пример #12
0
    def extent(self):
        """
        :rtype: geometry.Geometry
        """

        def xytuple(obj):
            return obj['x'], obj['y']

        projection = self.metadata.grid_spatial

        if 'valid_data' in projection:
            return geometry.Geometry(projection['valid_data'], crs=self.crs)
        else:
            geo_ref_points = projection['geo_ref_points']
            return geometry.polygon([xytuple(geo_ref_points[key]) for key in ('ll', 'ul', 'ur', 'lr', 'll')],
                                    crs=self.crs)
Пример #13
0
def test_props():
    crs = epsg4326

    box1 = geometry.box(10, 10, 30, 30, crs=crs)
    assert box1
    assert box1.is_valid
    assert not box1.is_empty
    assert box1.area == 400.0
    assert box1.boundary.length == 80.0
    assert box1.centroid == geometry.point(20, 20, crs)

    triangle = geometry.polygon([(10, 20), (20, 20), (20, 10), (10, 20)],
                                crs=crs)
    assert triangle.boundingbox == geometry.BoundingBox(10, 10, 20, 20)
    assert triangle.envelope.contains(triangle)

    assert box1.length == 80.0

    box1copy = geometry.box(10, 10, 30, 30, crs=crs)
    assert box1 == box1copy
    assert box1.convex_hull == box1copy  # NOTE: this might fail because of point order

    box2 = geometry.box(20, 10, 40, 30, crs=crs)
    assert box1 != box2

    bbox = geometry.BoundingBox(1, 0, 10, 13)
    assert bbox.width == 9
    assert bbox.height == 13
    assert bbox.points == [(1, 0), (1, 13), (10, 0), (10, 13)]

    assert bbox.transform(Affine.identity()) == bbox
    assert bbox.transform(Affine.translation(1, 2)) == geometry.BoundingBox(
        2, 2, 11, 15)

    pt = geometry.point(3, 4, crs)
    assert pt.json['coordinates'] == (3.0, 4.0)
    assert 'Point' in str(pt)
    assert bool(pt) is True
    assert pt.__nonzero__() is True

    # check "CRS as string is converted to class automatically"
    assert isinstance(geometry.point(3, 4, 'epsg:3857').crs, geometry.CRS)

    # constructor with bad input should raise ValueError
    with pytest.raises(ValueError):
        geometry.Geometry(object())
def make_long_SHP_query(shape_file):

    with fiona.open(shape_file) as shapes:
        crs = geometry.CRS(shapes.crs_wkt)
        first_geometry = next(iter(shapes))['geometry']
        geom = geometry.Geometry(first_geometry, crs=crs)

    dc = datacube.Datacube()

    query = {
        'time': ('2013-01-01', '2118-12-31'),
        'geopolygon': geom,
        'output_crs': 'EPSG:3577',
        'resampling': 'bilinear',
        'group_by': 'solar_day',
    }

    return dc, query, geom
Пример #15
0
def get_data_opensource_shapefile(prod_info, acq_min, acq_max, shapefile,
                                  no_partial_scenes):

    datacube_config = prod_info[0]
    source_prod = prod_info[1]
    source_band_list = prod_info[2]
    mask_band = prod_info[3]

    if datacube_config != 'default':
        remotedc = Datacube(config=datacube_config)
    else:
        remotedc = Datacube()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with fiona.open(shapefile) as shapes:
            crs = geometry.CRS(shapes.crs_wkt)
            first_geometry = next(iter(shapes))['geometry']
            geom = geometry.Geometry(first_geometry, crs=crs)

            return_data = {}
            data = xr.Dataset()

            if source_prod != '':
                # get a sample dataset to decide the target epsg
                fd_query = {'time': (acq_min, acq_max), 'geopolygon': geom}
                sample_fd_ds = remotedc.find_datasets(product=source_prod,
                                                      group_by='solar_day',
                                                      **fd_query)

                if (len(sample_fd_ds)) > 0:
                    # decidce pixel size for output data
                    pixel_x, pixel_y = get_pixel_size(sample_fd_ds[0],
                                                      source_band_list)
                    log.info(
                        'Output pixel size for product {}: x={}, y={}'.format(
                            source_prod, pixel_x, pixel_y))

                    # get target epsg from metadata
                    target_epsg = get_epsg(sample_fd_ds[0])
                    log.info('CRS for product {}: {}'.format(
                        source_prod, target_epsg))

                    query = {
                        'time': (acq_min, acq_max),
                        'geopolygon': geom,
                        'output_crs': target_epsg,
                        'resolution': (-pixel_y, pixel_x),
                        'measurements': source_band_list
                    }

                    if 's2' in source_prod:
                        data = remotedc.load(product=source_prod,
                                             group_by='solar_day',
                                             **query)
                    else:
                        data = remotedc.load(product=source_prod,
                                             align=(pixel_x / 2.0,
                                                    pixel_y / 2.0),
                                             group_by='solar_day',
                                             **query)

                    # remove cloud and nodta
                    data = remove_cloud_nodata(source_prod, data, mask_band)

                    if data.data_vars:
                        mask = geometry_mask([geom], data.geobox, invert=True)
                        data = data.where(mask)

                    if no_partial_scenes:
                        # calculate valid data percentage
                        data = only_return_whole_scene(data)

                return_data = {
                    source_prod: {
                        'data': data,
                        'mask_band': mask_band,
                        'find_list': sample_fd_ds
                    }
                }

    return return_data
Пример #16
0
def test_ops():
    box1 = geometry.box(10, 10, 30, 30, crs=epsg4326)
    box2 = geometry.box(20, 10, 40, 30, crs=epsg4326)
    box3 = geometry.box(20, 10, 40, 30, crs=epsg4326)
    box4 = geometry.box(40, 10, 60, 30, crs=epsg4326)
    no_box = None

    assert box1 != box2
    assert box2 == box3
    assert box3 != no_box

    union1 = box1.union(box2)
    assert union1.area == 600.0

    with pytest.raises(geometry.CRSMismatchError):
        box1.union(box2.to_crs(epsg3857))

    inter1 = box1.intersection(box2)
    assert bool(inter1)
    assert inter1.area == 200.0

    inter2 = box1.intersection(box4)
    assert not bool(inter2)
    assert inter2.is_empty
    # assert not inter2.is_valid  TODO: what's going on here?

    diff1 = box1.difference(box2)
    assert diff1.area == 200.0

    symdiff1 = box1.symmetric_difference(box2)
    assert symdiff1.area == 400.0

    # test segmented
    line = geometry.line([(0, 0), (0, 5), (10, 5)], epsg4326)
    line2 = line.segmented(2)
    assert line.crs is line2.crs
    assert line.length == line2.length
    assert len(line.coords) < len(line2.coords)
    poly = geometry.polygon([(0, 0), (0, 5), (10, 5)], epsg4326)
    poly2 = poly.segmented(2)
    assert poly.crs is poly2.crs
    assert poly.length == poly2.length
    assert poly.area == poly2.area
    assert len(poly.geom.exterior.coords) < len(poly2.geom.exterior.coords)

    poly2 = poly.exterior.segmented(2)
    assert poly.crs is poly2.crs
    assert poly.length == poly2.length
    assert len(poly.geom.exterior.coords) < len(poly2.geom.coords)

    # test interpolate
    pt = line.interpolate(1)
    assert pt.crs is line.crs
    assert pt.coords[0] == (0, 1)
    assert isinstance(pt.coords, list)

    with pytest.raises(TypeError):
        pt.interpolate(3)

    # test array interface
    assert line.__array_interface__ is not None
    assert np.array(line).shape == (3, 2)

    # test simplify
    poly = geometry.polygon([(0, 0), (0, 5), (10, 5)], epsg4326)
    assert poly.simplify(100) == poly

    # test iteration
    poly_2_parts = geometry.Geometry(
        {
            "type":
            "MultiPolygon",
            "coordinates": [[[[102.0, 2.0], [103.0, 2.0], [103.0, 3.0],
                              [102.0, 3.0], [102.0, 2.0]]],
                            [[[100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
                              [100.0, 1.0], [100.0, 0.0]],
                             [[100.2, 0.2], [100.8, 0.2], [100.8, 0.8],
                              [100.2, 0.8], [100.2, 0.2]]]]
        }, 'EPSG:4326')
    pp = list(poly_2_parts)
    assert len(pp) == 2
    assert all(p.crs == poly_2_parts.crs for p in pp)

    # test transform
    assert geometry.point(
        0, 0,
        epsg4326).transform(lambda x, y: (x + 1, y + 2)) == geometry.point(
            1, 2, epsg4326)

    # test sides
    box = geometry.box(1, 2, 11, 22, epsg4326)
    ll = list(geometry.sides(box))
    assert all(l.crs is epsg4326 for l in ll)
    assert len(ll) == 4
    assert ll[0] == geometry.line([(1, 2), (1, 22)], epsg4326)
    assert ll[1] == geometry.line([(1, 22), (11, 22)], epsg4326)
    assert ll[2] == geometry.line([(11, 22), (11, 2)], epsg4326)
    assert ll[3] == geometry.line([(11, 2), (1, 2)], epsg4326)
def get_training_data_for_shp(gdf,
                              index,
                              row,
                              out_arrs,
                              out_vars,
                              products,
                              dc_query,
                              custom_func=None,
                              field=None,
                              calc_indices=None,
                              reduce_func=None,
                              drop=True,
                              zonal_stats=None):
    """
    Function to extract data from the ODC for training a machine learning classifier 
    using a geopandas geodataframe of labelled geometries. 
    This function provides a number of pre-defined methods for producing training data, 
    including calcuating band indices, reducing time series using several summary statistics, 
    and/or generating zonal statistics across polygons.  The 'custom_func' parameter provides 
    a method for the user to supply a custom function for generating features rather than using the
    pre-defined methods.

    Parameters
    ----------
    gdf : geopandas geodataframe
        geometry data in the form of a geopandas geodataframe
    products : list
        a list of products to load from the datacube. 
        e.g. ['ls8_usgs_sr_scene', 'ls7_usgs_sr_scene']
    dc_query : dictionary
        Datacube query object, should not contain lat and long (x or y)
        variables as these are supplied by the 'gdf' variable
    field : string 
        A string containing the name of column with class labels. 
        Field must contain numeric values.
    out_arrs : list 
        An empty list into which the training data arrays are stored.
    out_vars : list 
        An empty list into which the data varaible names are stored.
    custom_func : function, optional 
        A custom function for generating feature layers. If this parameter
        is set, all other options (excluding 'zonal_stats'), will be ignored.
        The result of the 'custom_func' must be a single xarray dataset 
        containing 2D coordinates (i.e x, y - no time dimension). The custom function
        has access to the datacube dataset extracted using the 'dc_query' params,
        along with access to the 'dc_query' dictionary itself, which could be used
        to load other products besides those specified under 'products'.
    calc_indices: list, optional
        If not using a custom func, then this parameter provides a method for
        calculating a number of remote sensing indices (e.g. `['NDWI', 'NDVI']`).
    reduce_func : string, optional 
        Function to reduce the data from multiple time steps to
        a single timestep. Options are 'mean', 'median', 'std',
        'max', 'min', 'geomedian'.  Ignored if 'custom_func' is provided.
    drop : boolean, optional , 
        If this variable is set to True, and 'calc_indices' are supplied, the
        spectral bands will be dropped from the dataset leaving only the
        band indices as data variables in the dataset. Default is True.
    zonal_stats : string, optional
        An optional string giving the names of zonal statistics to calculate 
        for each polygon. Default is None (all pixel values are returned). Supported 
        values are 'mean', 'median', 'max', 'min', and 'std'. Will work in 
        conjuction with a 'custom_func'.


    Returns
    --------
    Two lists, a list of numpy.arrays containing classes and extracted data for 
    each pixel or polygon, and another containing the data variable names.

    """

    # prevent function altering dictionary kwargs
    dc_query = deepcopy(dc_query)

    # remove dask chunks if supplied as using
    # mulitprocessing for parallization
    if 'dask_chunks' in dc_query.keys():
        dc_query.pop('dask_chunks', None)

    # connect to datacube
    dc = datacube.Datacube(app='training_data')

    # set up query based on polygon (convert to WGS84)
    geom = geometry.Geometry(gdf.geometry.values[index].__geo_interface__,
                             geometry.CRS('epsg:4326'))

    # print(geom)
    q = {"geopolygon": geom}

    # merge polygon query with user supplied query params
    dc_query.update(q)

    # Identify the most common projection system in the input query
    output_crs = mostcommon_crs(dc=dc, product=products, query=dc_query)

    # load_ard doesn't handle geomedians
    # TODO: Add support for other sensors
    if 'ga_ls8c_gm_2_annual' in products:
        ds = dc.load(product='ga_ls8c_gm_2_annual', **dc_query)
        ds = ds.where(ds != 0, np.nan)

    else:
        # load data
        with HiddenPrints():
            ds = load_ard(dc=dc,
                          products=products,
                          output_crs=output_crs,
                          **dc_query)

    # create polygon mask
    with HiddenPrints():
        mask = xr_rasterize(gdf.iloc[[index]], ds)

    # mask dataset
    ds = ds.where(mask)

    # Use custom function for training data if it exists
    if custom_func is not None:
        with HiddenPrints():
            data = custom_func(ds)

    else:
        # first check enough variables are set to run functions
        if (len(ds.time.values) > 1) and (reduce_func == None):
            raise ValueError(
                "You're dataset has " + str(len(ds.time.values)) +
                " time-steps, please provide a reduction function," +
                " e.g. reduce_func='mean'")

        if calc_indices is not None:
            # determine which collection is being loaded
            if 'level2' in products[0]:
                collection = 'c2'
            elif 'gm' in products[0]:
                collection = 'c2'
            elif 'sr' in products[0]:
                collection = 'c1'
            elif 's2' in products[0]:
                collection = 's2'

            if len(ds.time.values) > 1:

                if reduce_func in ['mean', 'median', 'std', 'max', 'min']:
                    with HiddenPrints():
                        data = calculate_indices(ds,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)
                        # getattr is equivalent to calling data.reduce_func
                        method_to_call = getattr(data, reduce_func)
                        data = method_to_call(dim='time')

                elif reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)
                    with HiddenPrints():
                        data = calculate_indices(data,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)

                else:
                    raise Exception(
                        reduce_func + " is not one of the supported" +
                        " reduce functions ('mean','median','std','max','min', 'geomedian')"
                    )

            else:
                with HiddenPrints():
                    data = calculate_indices(ds,
                                             index=calc_indices,
                                             drop=drop,
                                             collection=collection)

        # when band indices are not required, reduce the
        # dataset to a 2d array through means or (geo)medians
        if calc_indices is None:

            if len(ds.time.values) > 1:

                if reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)

                elif reduce_func in ['mean', 'median', 'std', 'max', 'min']:
                    method_to_call = getattr(ds, reduce_func)
                    data = method_to_call('time')
            else:
                data = ds.squeeze()

    if zonal_stats is None:
        # If no zonal stats were requested then extract all pixel values
        flat_train = sklearn_flatten(data)
        # Make a labelled array of identical size
        flat_val = np.repeat(row[field], flat_train.shape[0])
        stacked = np.hstack((np.expand_dims(flat_val, axis=1), flat_train))

    elif zonal_stats in ['mean', 'median', 'std', 'max', 'min']:
        method_to_call = getattr(data, zonal_stats)
        flat_train = method_to_call()
        flat_train = flat_train.to_array()
        stacked = np.hstack((row[field], flat_train))

    else:
        raise Exception(
            zonal_stats + " is not one of the supported" +
            " reduce functions ('mean','median','std','max','min')")

    # Append training data and labels to list
    out_arrs.append(stacked)
    out_vars.append([field] + list(data.data_vars))
Пример #18
0
train_kfold = REFdata[REFdata['1ha']==1]
train_kfold = train_kfold[train_kfold[countCluster]>= Nplots]
train_kfold = train_kfold[train_kfold[DV] < maxValueplots]
print(f'kfold dataset: {train_kfold.shape}');


## Generacion de datos de entrenamiento a partir de los conglomerados
salidas=[]
training_labels_all=[]
training_samples_all=np.array([], dtype=np.int64).reshape(0,7)
for i in range(len(train_kfold.Cha_HD)):
    print(f'Running conglom {i+1}')
    try:
        a = json.loads(train_kfold.iloc[i]['.geo'])

        geom = geometry.Geometry(a,crs=CRS('EPSG:4326'))

        dc = datacube.Datacube(app="Cana")
        """
        ALOS = dc.load(
            product='ALOS2_PALSAR_MOSAIC',
            geopolygon=geom,
        )

        ALOS=ALOS.isel(time=0)
        ALOS
        """
        #for i in range(30):
        xarr_0 = dc.load(
            product='LS7_ETM_LEDAPS_MOSAIC',
            time=("2016-01-01", "2016-12-31"),
Пример #19
0
def generate_wb_timeseries(shapes, config_dict):
    """
    This is where the code processing is actually done. This code takes in a
    polygon, and the and a config dict which contains: shapefile's crs, output
    directory, id_field, time_span, and include_uncertainty which says whether
    to include all data as well as an invalid pixel count which can be used
    for measuring uncertainty performs a polygon drill into the wofs_albers
    product. The resulting xarray, which contains the water classified pixels
    for that polygon over every available timestep, is used to calculate the
    percentage of the water body that is wet at each time step. The outputs
    are written to a csv file named using the polygon UID, which is a geohash
    of the polygon's centre coords.

    Inputs:
    shapes - polygon to be interrogated
    config_dict - many config settings including crs, id_field, time_span,
                  shapefile

    Outputs:
    Nothing is returned from the function, but a csv file is written out to
        disk
    """
    output_dir = config_dict['output_dir']
    crs = config_dict['crs']
    id_field = config_dict['id_field']
    time_span = config_dict['time_span']
    include_uncertainty = config_dict['include_uncertainty']

    if include_uncertainty:
        unknown_percent_threshold = 100
    else:
        unknown_percent_threshold = 10

    with Datacube(app='Polygon drill') as dc:
        first_geometry = shapes['geometry']

        str_poly_name = shapes['properties'][id_field]

        try:
            fpath = os.path.join(output_dir,
                                 f'{str_poly_name[0:4]}/{str_poly_name}.csv')
        except TypeError:
            str_poly_name = str(int(str_poly_name)).zfill(6)
            fpath = os.path.join(output_dir,
                                 f'{str_poly_name[0:4]}/{str_poly_name}.csv')
        geom = geometry.Geometry(first_geometry, crs=crs)
        current_year = datetime.now().year

        if time_span == 'ALL':
            if shapely_geom.shape(first_geometry).envelope.area > 2000000:
                years = range(1986, current_year + 1, 5)
                time_periods = [(str(year), str(year + 4)) for year in years]
            else:
                time_periods = [('1986', str(current_year))]
        elif time_span == 'APPEND':
            start_date = get_last_date(fpath)
            if start_date is None:
                print(f'There is no csv for {str_poly_name}')
                return 1
            time_periods = [(start_date, str(current_year))]
        elif time_span == 'CUSTOM':
            time_periods = [(config_dict['start_dt'], config_dict['end_date'])]

        valid_capacity_pc = []
        valid_capacity_ct = []
        invalid_capacity_ct = []
        date_list = []
        for time in time_periods:
            wb_capacity_pc = []
            wb_capacity_ct = []
            wb_invalid_ct = []
            dry_observed = []
            invalid_observations = []

            # Set up the query, and load in all of the WOFS layers
            query = {'geopolygon': geom, 'time': time}
            wofl = dc.load(product='wofs_albers',
                           group_by='solar_day',
                           fuse_func=wofls_fuser,
                           **query)

            if len(wofl.attrs) == 0:
                print(f'There is no new data for {str_poly_name}')
                return 2
            # Make a mask based on the polygon (to remove extra data
            # outside of the polygon)
            mask = rasterio.features.geometry_mask(
                [geom.to_crs(wofl.geobox.crs) for geoms in [geom]],
                out_shape=wofl.geobox.shape,
                transform=wofl.geobox.affine,
                all_touched=False,
                invert=True)
            # mask the data to the shape of the polygon
            # the geometry width and height must both be larger than one pixel
            # to mask.
            if (geom.boundingbox.width > 25.3
                    and geom.boundingbox.height > 25.3):
                wofl_masked = wofl.water.where(mask)
            else:
                wofl_masked = wofl.water

            # Work out how full the waterbody is at every time step
            for ix, times in enumerate(wofl.time):

                # Grab the data for our timestep
                all_the_bit_flags = wofl_masked.isel(time=ix)

                # Find all the wet/dry pixels for that timestep
                lsa_wet = all_the_bit_flags.where(
                    all_the_bit_flags == 136).count().item()
                lsa_dry = all_the_bit_flags.where(
                    all_the_bit_flags == 8).count().item()
                sea_wet = all_the_bit_flags.where(
                    all_the_bit_flags == 132).count().item()
                sea_dry = all_the_bit_flags.where(
                    all_the_bit_flags == 4).count().item()
                sea_lsa_wet = all_the_bit_flags.where(
                    all_the_bit_flags == 140).count().item()
                sea_lsa_dry = all_the_bit_flags.where(
                    all_the_bit_flags == 12).count().item()
                wet_pixels = (all_the_bit_flags.where(
                    all_the_bit_flags == 128).count().item() + lsa_wet +
                              sea_wet + sea_lsa_wet)
                dry_pixels = (all_the_bit_flags.where(
                    all_the_bit_flags == 0).count().item() + lsa_dry +
                              sea_dry + sea_lsa_dry)

                # Count the number of masked observations
                masked_all = all_the_bit_flags.count().item()
                # Turn our counts into percents
                try:
                    water_percent = round((wet_pixels / masked_all * 100), 1)
                    dry_percent = round((dry_pixels / masked_all * 100), 1)
                    missing_pixels = masked_all - (wet_pixels + dry_pixels)
                    unknown_percent = missing_pixels / masked_all * 100

                except ZeroDivisionError:
                    water_percent = 0.0
                    dry_percent = 0.0
                    unknown_percent = 100.0
                    missing_pixels = masked_all
                    print(f'{str_poly_name} has divide by zero error')

                # Append the percentages to a list for each timestep
                # Filter out timesteps with < 90% valid observations. Add
                # empty values for timesteps with < 90% valid. if you set
                # 'UNCERTAINTY = True' in your config file then you will
                # only filter out timesteps with 100% invalid pixels.
                # You will also record the number invalid pixels per timestep.

                if unknown_percent < unknown_percent_threshold:
                    wb_capacity_pc.append(water_percent)
                    invalid_observations.append(unknown_percent)
                    wb_invalid_ct.append(missing_pixels)
                    dry_observed.append(dry_percent)
                    wb_capacity_ct.append(wet_pixels)
                else:
                    wb_capacity_pc.append('')
                    invalid_observations.append('')
                    wb_invalid_ct.append('')
                    dry_observed.append('')
                    wb_capacity_ct.append('')

            valid_obs = wofl.time.dropna(dim='time')
            valid_obs = valid_obs.to_dataframe()
            if 'spatial_ref' in valid_obs.columns:
                valid_obs = valid_obs.drop(columns=['spatial_ref'])
            valid_capacity_pc += wb_capacity_pc
            valid_capacity_ct += wb_capacity_ct
            invalid_capacity_ct += wb_invalid_ct
            date_list += valid_obs.to_csv(
                None,
                header=False,
                index=False,
                date_format="%Y-%m-%dT%H:%M:%SZ").split('\n')
            date_list.pop()

        if date_list:
            if include_uncertainty:
                rows = zip(date_list, valid_capacity_pc, valid_capacity_ct,
                           invalid_capacity_ct)
            else:
                rows = zip(date_list, valid_capacity_pc, valid_capacity_ct)
            os.makedirs(os.path.dirname(fpath), exist_ok=True)
            if time_span == 'APPEND':
                with open(fpath, 'a') as f:
                    writer = csv.writer(f)
                    for row in rows:
                        writer.writerow(row)
            else:
                with open(fpath, 'w') as f:
                    writer = csv.writer(f)
                    headings = [
                        'Observation Date', 'Wet pixel percentage',
                        'Wet pixel count (n = {0})'.format(masked_all)
                    ]
                    if include_uncertainty:
                        headings.append('Invalid pixel count')
                    writer.writerow(headings)
                    for row in rows:
                        writer.writerow(row)
        else:
            print(f'{str_poly_name} has no new good valid data')
        return True
Пример #20
0
def get_training_data_for_shp(polygons,
                              out,
                              products,
                              dc_query,
                              field=None,
                              calc_indices=None,
                              reduce_func='median',
                              drop=True,
                              zonal_stats=None,
                              collection='c1'):
    """
    Function to extract data for training a classifier using a shapefile 
    of labelled polygons.

    Parameters
    ----------
    polygons : geopandas geodataframe
        polygon data in the form of a geopandas geodataframe
    out : list
        Empty list to contain output data.
    products : list
        a list of products ot load from the datacube. 
        e.g. ['ls8_usgs_sr_scene', 'ls7_usgs_sr_scene']
    dc_query : dictionary
        Datacube query object, should not contain lat and long (x or y)
        variables as these are supplied by the 'polygons' variable
    field : string 
        A string containing name of column with labels in shapefile 
        attribute table. Field must contain numeric values.
    calc_indices: list, optional
        An optional list giving the names of any remote sensing indices 
        to be calculated on the loaded data (e.g. `['NDWI', 'NDVI']`. 
    reduce_func : string, optional 
        Function to reduce the data from multiple time steps to
        a single timestep. Options are 'mean'
    drop : booleam, optional , 'median', or 'geomedian'
        If this variable is set to True, and 'calc_indices' are supplied, the
        spectral bands will be dropped from the dataset leaving only the
        band indices as data variables in the dataset. Default is False.
    zonal_stats: string, optional
        An optional string giving the names of zonal statistics to calculate 
        for the polygon. Default is None (all pixel values). Supported 
        values are 'mean' or 'median' 
    collection: string, optional
        to calculate band indices, the satellite collection is required.
        Options include 'c1' for Landsat C1, 'c2' for Landsat C2, and 
        's2' for Sentinel 2.

    Returns
    --------
    A list of numpy.arrays containing classes and extracted data for 
    each pixel or polygon.

    """
    #prevent function altering dictionary kwargs
    dc_query = deepcopy(dc_query)
    dc = datacube.Datacube(app='training_data')

    #set up some print statements
    i = 0
    if calc_indices is not None:
        print("Calculating indices: " + str(calc_indices))
    if reduce_func is not None:
        print("Reducing data using: " + reduce_func)
    if zonal_stats is not None:
        print("Taking zonal statistic: " + zonal_stats)

    # loop through polys and extract training data
    for index, row in polygons.iterrows():
        print(" Feature {:04}/{:04}\r".format(i + 1, len(polygons)), end='')

        # set up query based on polygon (convert to WGS84)
        geom = geometry.Geometry(polygons.geometry.values[0].__geo_interface__,
                                 geometry.CRS('epsg:4326'))

        q = {"geopolygon": geom}

        # merge polygon query with user supplied query params
        dc_query.update(q)

        # Identify the most common projection system in the input query
        output_crs = mostcommon_crs(dc=dc, product=products, query=dc_query)

        #load_ard doesn't handle geomedians
        if 'ga_ls8c_gm_2_annual' in products:
            ds = dc.load(product='ga_ls8c_gm_2_annual', **dc_query)

        else:
            # load data
            with HiddenPrints():
                ds = load_ard(dc=dc,
                              products=products,
                              output_crs=output_crs,
                              **dc_query)

        # create polygon mask
        mask = rasterio.features.geometry_mask(
            [geom.to_crs(ds.geobox.crs) for geoms in [geom]],
            out_shape=ds.geobox.shape,
            transform=ds.geobox.affine,
            all_touched=False,
            invert=False)

        mask = xr.DataArray(mask, dims=("y", "x"))
        ds = ds.where(mask == False)

        # Check if band indices are wanted
        if calc_indices is not None:

            if len(ds.time.values) > 1:

                if reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)
                    with HiddenPrints():
                        data = calculate_indices(data,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)

                elif reduce_func == 'std':
                    with HiddenPrints():
                        data = calculate_indices(ds,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)
                    data = data.std('time')

                elif reduce_func == 'mean':
                    with HiddenPrints():
                        data = calculate_indices(ds,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)

                    data = data.mean('time')

                elif reduce_func == 'median':
                    with HiddenPrints():
                        data = calculate_indices(ds,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)

                    data = data.median('time')
            else:
                with HiddenPrints():
                    data = calculate_indices(ds,
                                             index=calc_indices,
                                             drop=drop,
                                             collection=collection)

        # when band indices are not required, reduce the
        # dataset to a 2d array through means or (geo)medians
        if calc_indices is None:
            if (len(ds.time.values) > 1) and (reduce_func == None):
                raise ValueError(
                    "You're dataset has " + str(len(ds.time.values)) +
                    "time-steps, please provide a reduction function, e.g. reduce_func='mean'"
                )

            if len(ds.time.values) > 1:
                if reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)

                if reduce_func == 'mean':
                    data = ds.mean('time')

                if reduce_func == 'std':
                    data = ds.std('time')

                if reduce_func == 'median':
                    data = ds.median('time')

            else:
                data = ds.squeeze()

        # compute in case we have dask arrays
        data = data.compute()

        if zonal_stats is None:
            # If no summary stats were requested then extract all pixel values
            flat_train = sklearn_flatten(data)
            # Make a labelled array of identical size
            flat_val = np.repeat(row[field], flat_train.shape[0])
            stacked = np.hstack((np.expand_dims(flat_val, axis=1), flat_train))

        elif zonal_stats == 'mean':
            flat_train = data.mean(axis=None, skipna=True)
            flat_train = flat_train.to_array()
            stacked = np.hstack((row[field], flat_train))

        elif zonal_stats == 'median':
            flat_train = data.median(axis=None, skipna=True)
            flat_train = flat_train.to_array()
            stacked = np.hstack((row[field], flat_train))

        # Append training data and label to list
        out.append(stacked)
        i += 1
    # Return a list of labels for columns in output array

    return [field] + list(data.data_vars)
Пример #21
0
def rasterize_points(
    config=None,
    emission_types={
        "Fuel Consumption [kg]": "Fuel",
        "NOx [kg]": "NOX",
        "CO2 [kg]": "CO2"
    },
    #resolution=(-0.03, 0.05),
    #bbox=[-4, 50, 25, 65],
):
    """
    """
    if config is None:
        with open("config.json") as file:
            config = json.load(file)

    resolution = config["resolution"]
    bbox = config["bounding_box"]

    datapath = os.path.join(
        os.path.expanduser("~"),
        config["intermediate_data"],
        "ship_emissions",
    )

    filepaths = [os.path.join(
        datapath,
        i,
    ) for i in os.listdir(datapath)]
    filepaths.sort()

    # path to store data
    result_data = os.path.join(os.path.expanduser("~"), config["result_data"])

    if not os.path.exists(result_data):
        os.makedirs(result_data)

    # reproject to geo dataframe right LCC
    crs = "epsg:4326"  # LCC "+proj=lcc +lat_1=30 +lat_2=60 +lat_0=55 +lon_0=10 +y_0=1e+06 +x_0=1275000 +a=6370997 +b=6370997 +units=km +no_defs"

    bounding_box = box(bbox[0], bbox[1], bbox[2], bbox[3])

    json_box = mapping(bounding_box)  # minx miny maxx maxy

    json_box["crs"] = {"properties": {"name": crs}}

    geopoly = geometry.Geometry(
        json_box,
        crs=crs,
    )
    geobox = geometry.GeoBox.from_geopolygon(
        geopoly,
        resolution,
        crs=crs,
    )  # resolution y,x

    # geobox.xr_coords() # also get coords as xarrays from geobox
    coords = affine_to_coords(geobox.affine, geobox.width, geobox.height)

    for emission_type in emission_types.keys():
        emissions_per_day = {}
        dates = []
        for file in filepaths:
            df = pd.read_csv(file, index_col=[0],
                             parse_dates=True)  # , nrows=1000000)

            geodf = gpd.GeoDataFrame(
                df,
                crs="epsg:4326",
                geometry=gpd.points_from_xy(df.lon, df.lat),
            )

            if "lcc" in crs:
                geodf = geodf.to_crs(crs)

            arr = rasterize(
                zip(
                    geodf.geometry.apply(mapping).values,
                    geodf[emission_type],
                ),  # colums 7 is co2
                out_shape=(
                    geobox.height,
                    geobox.width,
                ),
                transform=geobox.affine,
                merge_alg=MergeAlg.add,
                all_touched=True,
            )

            date = df.index[
                0].dayofyear  # df.index.date[0].strftime("%Y-%m-%d")
            dates.append(date)
            emissions_per_day[date] = arr

        da = xr.DataArray(
            [i for i in emissions_per_day.values()],
            dims=[
                "time",
                "lat",
                "lon",
            ],
            coords=[
                np.array(dates),
                coords["y"],
                coords["x"],
            ],
        )

        da = da.rename("sum")
        da = da.astype("float64")
        da.attrs = {"units": "kg d-1"}

        da.coords["time"].attrs = {
            "standard_name": "time",
            "calendar": "proleptic_gregorian",
            "units": "days since 2015-01-01",
            "axis": "T",
        }
        da.coords["lon"].attrs = {
            "standard_name": "longnitude",
            "long_name": "longnitude",
            "units": "degrees_east",
            "axis": "X",
        }
        da.coords["lat"].attrs = {
            "standard_name": "latitude",
            "long_name": "latitude",
            "units": "degrees_north",
            "axis": "Y",
        }

        da.to_netcdf(
            os.path.join(result_data, emission_types[emission_type] +
                         ".nc"),  # write to shorter file name
            encoding={
                "lat": {
                    "dtype": "float32"
                },
                "lon": {
                    "dtype": "float32"
                },
                "sum": {
                    "dtype": "float32"
                },
            },
        )
Пример #22
0
def WIT_drill(gdf_poly,
              time,
              min_gooddata=0.80,
              TCW_threshold=-6000,
              export_csv=None,
              dask_chunks=None):
    """
    The Wetlands Insight Tool. This function loads FC, WOfS, Landsat-ARD,
    and calculate tasseled cap wetness, in order to determine the dominant
    land cover class within a polygon at each satellite observation.

    The output is a pandas dataframe containing a timeseries of the relative
    fractions of each class at each time-step. This forms the input to produce
    a stacked line-plot.

    Last modified: Feb 2020

    Parameters
    ----------  
    gdf_poly : geopandas.GeoDataFrame
        The dataframe must only contain a single row,
        containing the polygon you wish to interrograte.
    time : tuple
        a tuple containing the time range over which to run the WIT.
        e.g. ('2015-01' , '2019-12')
    min_gooddata : Float, optional
        A number between 0 and 1 (e.g 0.8) indicating the minimum percentage
        of good quality pixels required for a satellite observation to be loaded
        and therefore included in the WIT plot.  Defaults to 0.8, which should
        be considered a minimum percentage.
    TCW_threshold : Int, optional
        The tasseled cap wetness threshold, beyond which a pixel will be 
        considered 'wet'. Defaults to -6000. Consider the surface reflectance
        scaling of the Landsat product when adjusting this (C2 = 1-65,535) 
    export_csv : str, optional
        To export the returned pandas dataframe provide
        a location string (e.g. 'output/results.csv')
    dask_chunks : dict, optional
        To lazily load the datasets using dask, pass a dictionary containing
        the dimensions over which to chunk e.g. {'time':-1, 'x':250, 'y':250}.
        The function is not currently set up to handle dask arrays very well, so
        memory efficieny using dask will be of limited use here.
        
    Returns
    -------
    PolyDrill_df : Pandas.Dataframe
        A pandas dataframe containing the timeseries of relative fractions
        of each land cover class (WOfs, FC, TCW) 

    """

    print("working on polygon: " +
          str(gdf_poly.drop('geometry', axis=1).values) + ".  ")

    # make quaery from polygon
    geom = geometry.Geometry(gdf_poly.geometry.values[0].__geo_interface__,
                             geometry.CRS("epsg:4326"))
    query = {"geopolygon": geom, "time": time}

    # set Sandbox configs to load COG's faster
    datacube.utils.rio.set_default_rio_config(aws="auto", cloud_defaults=True)

    # Create a datacube instance
    dc = datacube.Datacube(app="wetlands insight tool")

    # find UTM crs for location
    crs = deafrica_datahandling.mostcommon_crs(dc=dc,
                                               product="usgs_ls8c_level2_2",
                                               query=query)

    # load landsat 5,7,8 data
    ls578_ds = deafrica_datahandling.load_ard(
        dc=dc,
        products=["usgs_ls8c_level2_2"],
        output_crs=crs,
        min_gooddata=min_gooddata,
        measurements=["red", "green", "blue", "nir", "swir_1", "swir_2"],
        align=(15, 15),
        dask_chunks=dask_chunks,
        group_by='solar_day',
        resolution=(-30, 30),
        **query,
    )

    # mask the data with our original polygon to remove extra data
    data = ls578_ds
    mask = rasterio.features.geometry_mask(
        [geom.to_crs(data.geobox.crs) for geoms in [geom]],
        out_shape=data.geobox.shape,
        transform=data.geobox.affine,
        all_touched=False,
        invert=False,
    )

    # mask the data with the polygon
    mask_xr = xr.DataArray(mask, dims=("y", "x"))
    ls578_ds = data.where(mask_xr == False)
    print("size of wetlands array: " +
          str(ls578_ds.isel(time=1).red.values.shape))

    ls578_ds = ls578_ds.compute()

    # calculate tasselled cap wetness within masked AOI
    print("calculating tasseled cap index ")
    tci = thresholded_tasseled_cap(ls578_ds,
                                   wetness_threshold=TCW_threshold,
                                   drop=True,
                                   drop_tc_bands=True)
    # select only finite values (over threshold values)
    tcw = xr.ufuncs.isfinite(tci.wetness_thresholded)
    # #reapply the polygon mask
    tcw = tcw.where(mask_xr == False)

    print("Loading WOfS layers ")
    wofls = dc.load(
        product="ga_ls8c_wofs_2",
        like=ls578_ds,
        fuse_func=wofs_fuser,
        dask_chunks=dask_chunks,
    )
    wofls = wofls.where(wofls.time == tcw.time)
    # #reapply the polygon mask
    wofls = wofls.where(mask_xr == False)
    wofls = wofls.compute()

    wet_wofs = wofls.where(wofls.water == 128)

    # use bit values for wet (128) and terrain/low-angle (8)
    shadow_wofs = wofls.where(wofls.water == 136)
    # bit values for wet (128) and sea (4)
    sea_wofs = wofls.where(wofls.water == 132)
    # bit values for wet (128) and sea (4) and terrain/low-angle (8)
    sea_shadow_wofs = wofls.where(wofls.water == 140)

    # load Fractional cover
    print("Loading fractional Cover")
    # load fractional cover
    fc_ds = dc.load(
        product="ga_ls8c_fractional_cover_2",
        dask_chunks=dask_chunks,
        like=ls578_ds,
        measurements=["pv", "npv", "bs"],
    )
    # use landsat data set to cloud mask FC
    fc_ds = fc_ds.where(ls578_ds.red)

    # mask with polygon
    fc_ds = fc_ds.where(mask_xr == False)
    fc_ds = fc_ds.compute()

    fc_ds_noTCW = fc_ds.where(tcw == False)

    print("Generating classification")
    # match timesteps
    fc_ds_noTCW = fc_ds_noTCW.where(fc_ds_noTCW.time == tcw.time)

    # following robbi's advice, cast the dataset to a dataarray
    maxFC = fc_ds_noTCW.to_array(dim="variable", name="maxFC")

    # turn FC array into integer only as nanargmax doesn't seem to handle floats the way we want it to
    FC_int = maxFC.astype("int8")

    # use numpy.nanargmax to get the index of the maximum value along the variable dimension
    # BSPVNPV=np.nanargmax(FC_int, axis=0)
    BSPVNPV = FC_int.argmax(dim="variable")

    FC_mask = xr.ufuncs.isfinite(maxFC).all(dim="variable")

    # #re-mask with nans to remove no-data
    BSPVNPV = BSPVNPV.where(FC_mask)
    # restack the Fractional cover dataset all together
    # CAUTION:ARGMAX DEPENDS ON ORDER OF VARIABALES IN
    # DATASET, THESE WILL BE DIFFERENT FOR DIFFERENT COLLECTIONS.
    # NEED TO ADJUST 0,1,2 BELOW DEPENDING ON ORDER OF FC VARIABLES
    # IN THE DATASET.
    FC_dominant = xr.Dataset({
        "BS": (BSPVNPV == 2).where(FC_mask),
        "PV": (BSPVNPV == 0).where(FC_mask),
        "NPV": (BSPVNPV == 1).where(FC_mask),
    })
    # count number of Fractional Cover pixels for each cover type in area of interest
    FC_count = FC_dominant.sum(dim=["x", "y"])

    # number of pixels in area of interest
    pixels = (mask_xr == 0).sum(dim=["x", "y"])

    # count number of tcw pixels
    tcw_pixel_count = tcw.sum(dim=["x", "y"])

    #     return FC_dominant, FC_mask, BSPVNPV, fc_ds, ls578_ds
    # number of pixels in area of interest
    pixels = (mask_xr == 0).sum(dim=["x", "y"])

    wofs_pixels = (wet_wofs.water.count(dim=["x", "y"]) +
                   shadow_wofs.water.count(dim=["x", "y"]) +
                   sea_wofs.water.count(dim=["x", "y"]) +
                   sea_shadow_wofs.water.count(dim=["x", "y"]))

    # count percentage of area of wofs
    wofs_area_percent = (wofs_pixels / pixels) * 100

    # count number of tcw pixels
    tcw_pixel_count = tcw.sum(dim=["x", "y"])

    # calculate percentage area wet
    tcw_area_percent = (tcw_pixel_count / pixels) * 100

    # calculate wet not wofs
    tcw_less_wofs = tcw_area_percent - wofs_area_percent

    # Fractional cover pixel count method
    # Get number of FC pixels, divide by total number of pixels per polygon
    # Work out the number of nodata pixels in the data, so that we can graph the variables by number of observed pixels.
    Bare_soil_percent = (FC_count.BS / pixels) * 100
    Photosynthetic_veg_percent = (FC_count.PV / pixels) * 100
    NonPhotosynthetic_veg_percent = (FC_count.NPV / pixels) * 100
    NoData = (100 - wofs_area_percent - tcw_less_wofs -
              Photosynthetic_veg_percent - NonPhotosynthetic_veg_percent -
              Bare_soil_percent)
    NoDataPixels = (NoData / 100) * pixels

    # Fractional cover pixel count method
    # Get number of FC pixels, divide by total number of pixels per polygon
    Bare_soil_percent2 = (FC_count.BS / (pixels - NoDataPixels)) * 100
    Photosynthetic_veg_percent2 = (FC_count.PV / (pixels - NoDataPixels)) * 100
    NonPhotosynthetic_veg_percent2 = (FC_count.NPV /
                                      (pixels - NoDataPixels)) * 100

    # count percentage of area of wofs
    wofs_area_percent2 = (wofs_pixels / (pixels - NoDataPixels)) * 100
    # wofs_area_percent
    wofs_area_percent = (wofs_pixels / pixels) * 100
    # count number of tcw pixels
    tcw_pixel_count2 = tcw.sum(dim=["x", "y"])

    # calculate percentage area wet
    tcw_area_percent2 = (tcw_pixel_count2 / (pixels - NoDataPixels)) * 100

    # calculate wet not wofs
    tcw_less_wofs2 = tcw_area_percent2 - wofs_area_percent2

    # last check for timestep matching before we plot
    wofs_area_percent2 = wofs_area_percent2.where(
        wofs_area_percent2.time == Bare_soil_percent2.time)
    Bare_soil_percent2 = Bare_soil_percent2.where(
        Bare_soil_percent2.time == wofs_area_percent2.time)
    Photosynthetic_veg_percent2 = Photosynthetic_veg_percent2.where(
        Photosynthetic_veg_percent2.time == wofs_area_percent2.time)
    NonPhotosynthetic_veg_percent2 = NonPhotosynthetic_veg_percent2.where(
        NonPhotosynthetic_veg_percent2.time == wofs_area_percent2.time)

    # start setup of dataframe by adding only one dataset
    WOFS_df = pd.DataFrame(
        data=wofs_area_percent2.data,
        index=wofs_area_percent2.time.values,
        columns=["wofs_area_percent"],
    )

    # add data into pandas dataframe for export
    WOFS_df["wet_percent"] = tcw_less_wofs2.data
    WOFS_df["green_veg_percent"] = Photosynthetic_veg_percent2.data
    WOFS_df["dry_veg_percent"] = NonPhotosynthetic_veg_percent2.data
    WOFS_df["bare_soil_percent"] = Bare_soil_percent2.data

    # call the composite dataframe something sensible, like PolyDrill
    PolyDrill_df = WOFS_df.round(2)

    # save the csv of the output data used to create the stacked plot for the polygon drill
    if export_csv:
        print('exporting csv: ' + export_csv)
        PolyDrill_df.to_csv(export_csv, index_label="Datetime")

    ls578_ds = None
    data = None
    fc_ds = None
    wofls = None
    tci = None

    return PolyDrill_df
Пример #23
0
def FindOutHowFullTheDamIs(shapes, crs):
    """
    This is where the code processing is actually done. This code takes in a polygon, and the
    shapefile's crs and performs a polygon drill into the wofs_albers product. The resulting
    xarray, which contains the water classified pixels for that polygon over every available
    timestep, is used to calculate the percentage of the water body that is wet at each time step.
    The outputs are written to a csv file named using the polygon ID.

    Inputs:
    shapes - polygon to be interrogated
    crs - crs of the shapefile

    Outputs:
    True or False - False if something unexpected happened, so the function can be run again.
    a csv file on disk is appended for every valid polygon.
    """
    dc = Datacube(app='Polygon drill')
    first_geometry = shapes['geometry']
    if 'ID' in shapes['properties'].keys():
        polyName = shapes['properties']['ID']
    else:
        polyName = shapes['properties']['FID']

    strPolyName = str(polyName).zfill(6)
    fpath = os.path.join(output_dir, f'{strPolyName[0:4]}/{strPolyName}.csv')

    # start_date = get_last_date(fpath)
    start_date = '2021-05-01'
    if start_date is None:
        time_period = ('2021-03-01', current_time.strftime('%Y-%m-%d'))
        # print(f'There is no csv for {strPolyName}')
        # return 1
    else:
        time_period = ('2021-03-01', current_time.strftime('%Y-%m-%d'))

        geom = geometry.Geometry(first_geometry, crs=crs)

        ## Set up the query, and load in all of the WOFS layers
        query = {'geopolygon': geom, 'time': time_period}
        #         WOFL = dc.load(product='wofs_albers', **query)
        WOFL = dc.load(product='wofs_albers',
                       group_by='solar_day',
                       fuse_func=wofls_fuser,
                       **query)
        if len(WOFL.attrs) == 0:
            print(f'There is no new data for {strPolyName}')
            return 2
        # Make a mask based on the polygon (to remove extra data outside of the polygon)
        mask = rasterio.features.geometry_mask(
            [geom.to_crs(WOFL.geobox.crs) for geoms in [geom]],
            out_shape=WOFL.geobox.shape,
            transform=WOFL.geobox.affine,
            all_touched=False,
            invert=True)
        wofl_masked = WOFL.water.where(mask)
        ## Work out how full the dam is at every time step
        DamCapacityPc = []
        DamCapacityCt = []
        LSA_WetPc = []
        DryObserved = []
        InvalidObservations = []
        for ix, times in enumerate(WOFL.time):

            # Grab the data for our timestep
            AllTheBitFlags = wofl_masked.isel(time=ix)

            # Find all the wet/dry pixels for that timestep
            LSA_Wet = AllTheBitFlags.where(
                AllTheBitFlags == 136).count().item()
            LSA_Dry = AllTheBitFlags.where(AllTheBitFlags == 8).count().item()
            WetPixels = AllTheBitFlags.where(
                AllTheBitFlags == 128).count().item() + LSA_Wet
            DryPixels = AllTheBitFlags.where(
                AllTheBitFlags == 0).count().item() + LSA_Dry

            # Apply the mask and count the number of observations
            MaskedAll = AllTheBitFlags.count().item()
            # Turn our counts into percents
            try:
                WaterPercent = WetPixels / MaskedAll * 100
                DryPercent = DryPixels / MaskedAll * 100
                UnknownPercent = (MaskedAll -
                                  (WetPixels + DryPixels)) / MaskedAll * 100
                LSA_WetPercent = LSA_Wet / MaskedAll * 100
            except ZeroDivisionError:
                WaterPercent = 0.0
                DryPercent = 0.0
                UnknownPercent = 100.0
                LSA_WetPercent = 0.0
            # Append the percentages to a list for each timestep
            DamCapacityPc.append(WaterPercent)
            InvalidObservations.append(UnknownPercent)
            DryObserved.append(DryPercent)
            DamCapacityCt.append(WetPixels)
            LSA_WetPc.append(LSA_WetPercent)

        ## Filter out timesteps with less than 90% valid observations
        try:
            ValidMask = [
                i for i, x in enumerate(InvalidObservations) if x < 10
            ]
            if len(ValidMask) > 0:
                ValidObs = WOFL.time[ValidMask].dropna(dim='time')
                ValidCapacityPc = [DamCapacityPc[i] for i in ValidMask]
                ValidCapacityCt = [DamCapacityCt[i] for i in ValidMask]
                ValidLSApc = [LSA_WetPc[i] for i in ValidMask]
                ValidObs = ValidObs.to_dataframe()
                if 'spatial_ref' in ValidObs.columns:
                    ValidObs = ValidObs.drop(columns=['spatial_ref'])

                DateList = ValidObs.to_csv(
                    None,
                    header=False,
                    index=False,
                    date_format="%Y-%m-%dT%H:%M:%SZ").split('\n')
                rows = zip(DateList, ValidCapacityPc, ValidCapacityCt,
                           ValidLSApc)

                if DateList:
                    strPolyName = str(polyName).zfill(6)
                    fpath = os.path.join(
                        output_dir, f'{strPolyName[0:4]}/{strPolyName}.csv')
                    os.makedirs(os.path.dirname(fpath), exist_ok=True)
                    with open(fpath, 'w') as f:
                        writer = csv.writer(f)
                        Headings = [
                            'Observation Date', 'Wet pixel percentage',
                            'Wet pixel count (n = {0})'.format(MaskedAll),
                            'LSA Wet Pixel Pct'
                        ]
                        writer.writerow(Headings)
                        for row in rows:
                            writer.writerow(row)
            else:
                print(f'{polyName} has no new good (90percent) valid data')
            return 1
        except:
            print(f'This polygon isn\'t working...: {polyName}')
            return 3
Пример #24
0
def analyze_parcel(_id, uid, coords, bbox):
    print("Starting analysis for: " + _id)

    time_range = ('2020-08-17', '2020-08-31')
    products = ['s2_l2a']
    measurements = ['red', 'green', 'blue', 'nir']
    resolution = [-10, 10]
    output_crs = 'EPSG:31700'
    attribute_col = 'id'

    data = {
        "type":
        "FeatureCollection",
        "bbox":
        bbox,
        "features": [{
            "type": "Feature",
            "geometry": {
                "type": "Polygon",
                "coordinates": [coords]
            }
        }]
    }

    filename = "/tmp/" + _id + ".geojson"
    with open(filename, "w") as tmpfile:
        json.dump(data, tmpfile, ensure_ascii=False, indent=4)

    gdf = gpd.read_file(filename)
    gdf['id'] = range(0, len(gdf))

    query = {
        'time': time_range,
        'measurements': measurements,
        'resolution': resolution,
        'output_crs': output_crs
    }

    # Dictionary to save results
    results = {}

    # Progress indicator
    i = 0

    # Loop through polygons in geodataframe and extract satellite data
    for index, row in gdf.iterrows():
        print(" Feature {:02}/{:02}\r".format(i + 1, len(gdf)), end='')

        # Get the geometry
        geom = geometry.Geometry(row.geometry.__geo_interface__,
                                 geometry.CRS(f'EPSG:{gdf.crs.to_epsg()}'))

        # Update dc query with geometry
        query.update({'geopolygon': geom})

        # Load landsat (hide print statements)
        ds = load_ard(dc=dc, products=products, group_by='solar_day', **query)

        # Generate a polygon mask to keep only data within the polygon:
        mask = xr_rasterize(gdf.iloc[[index]], ds)

        # Mask dataset to set pixels outside the polygon to `NaN`
        ds = ds.where(mask)

        # Append results to a dictionary using the attribute
        # column as an key
        results.update({str(row[attribute_col]): ds})

        # Update counter
        i += 1

    polygon_result = results['0']

    ndvi = calculate_indices(results['0'], index='NDVI', collection='c1')
    ndwi = calculate_indices(results['0'], index='NDWI', collection='c1')
    savi = calculate_indices(results['0'], index='SAVI', collection='c1')

    ndvi_result = ndvi.NDVI.values
    ndwi_result = ndwi.NDWI.values
    savi_result = savi.SAVI.values

    t, h, w = ndvi_result.shape
    for timestep in range(0, t):
        ndvi_img = Image.fromarray(
            np.uint8(cm.get_cmap("YlGn")(ndvi_result[timestep]) * 255))

        filename = _id + timestep.__str__() + ".ndvi.png"
        ndvi_img.save(filename)
        bucket = 'ceres-analyzed-data'

        upload_file(filename, bucket)

        link = 'https://ceres-analyzed-data.s3.eu-central-1.amazonaws.com/' + filename
        date = '2020-08-15'

        payload = {"date": date, "link": link}

        url = 'http://parcel-manager-server:8080/parcels/' + uid.__str__(
        ) + '/' + _id.__str__()

        requests.patch(url, json=payload)
Пример #25
0
 def init_polygon(self):
     crs = self.crs
     crs = geometry.CRS(crs)
     first_geometry = {'type': 'Polygon', 'coordinates': eval(self.poly)}
     geom = geometry.Geometry(first_geometry, crs=crs)
     return geom
Пример #26
0
def interval_uncertainty(polygon_id, item_polygon_path,
                         products=('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'),
                         time_period=('1986-01-01', '2017-01-01')):

    """
    This function uses the Digital Earth Australia archive to compute the standard deviation of tide heights for all
    Landsat observations that were used to generate the ITEM 2.0 composite layers and resulting tidal intervals. These
    standard deviations (one for each ITEM 2.0 interval) quantify the 'uncertainty' of each NIDEM elevation estimate:
    larger values indicate the ITEM interval was produced from a composite of images with a larger range of tide
    heights.

    Last modified: September 2018
    Author: Robbi Bishop-Taylor

    :param polygon_id:
        An integer giving the polygon ID of the desired ITEM v2.0 polygon to analyse.

    :param item_polygon_path:
        A string giving the path to the ITEM v2.0 polygon shapefile.

    :param products:
        An optional tuple of DEA Landsat product names used to calculate tide heights of all observations used
        to generate ITEM v2.0 tidal intervals. Defaults to ('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'),
        which loads Landsat 5, Landsat 7 and Landsat 8.

    :param time_period:
        An optional tuple giving the start and end date to analyse. Defaults to ('1986-01-01', '2017-01-01'), which
        analyses all Landsat observations from the start of 1986 to the end of 2016.

    :return:
        An array of shape (9,) giving the standard deviation of tidal heights for all Landsat observations used to
        produce each ITEM interval.

    """

    # Import tidal model data and extract geom and tide post
    item_gpd = gpd.read_file(item_polygon_path)
    lat, lon, poly = item_gpd[item_gpd.ID == int(polygon_id)][['lat', 'lon', 'geometry']].values[0]
    geom = geometry.Geometry(mapping(poly), crs=geometry.CRS(item_gpd.crs['init']))

    all_times_obs = list()

    # For each product:
    for source in products:

        # Use entire time range unless LS7
        time_range = ('1986-01-01', '2003-05-01') if source == 'ls7_pq_albers' else time_period

        # Determine matching datasets for geom area and group into solar day
        ds = dc.find_datasets(product=source, time=time_range, geopolygon=geom)
        group_by = query_group_by(group_by='solar_day')
        sources = dc.group_datasets(ds, group_by)

        # If data is found, add time to list then sort
        if len(ds) > 0:
            all_times_obs.extend(sources.time.data.astype('M8[s]').astype('O').tolist())

    # Calculate tide data from X-Y-time location
    all_times_obs = sorted(all_times_obs)
    tp_obs = [TimePoint(float(lon), float(lat), dt) for dt in all_times_obs]
    tides_obs = [tide.tide_m for tide in predict_tide(tp_obs)]

    # Covert to dataframe of observed dates and tidal heights
    df1_obs = pd.DataFrame({'Tide_height': tides_obs}, index=pd.DatetimeIndex(all_times_obs))


    ##################
    # ITEM intervals #
    ##################

    # Compute percentage tide height
    min_height = df1_obs.Tide_height.min()
    max_height = df1_obs.Tide_height.max()
    observed_range = max_height - min_height

    # Create dict of percentile values
    per10_dict = {perc + 1: min_height + observed_range * perc * 0.1 for perc in range(0, 10, 1)}

    # Bin each observation into an interval
    df1_obs['interval'] = pd.cut(df1_obs.Tide_height,
                                 bins=list(per10_dict.values()),
                                 labels=list(per10_dict.keys())[:-1])

    return df1_obs.groupby('interval').std().values.flatten()
Пример #27
0
Файл: main.py Проект: klh5/COLD
print("Loading data...")

if(tile):
    
    grd_path = args.gridfile
        
    grd = gpd.read_file(grd_path)
    
    dc = datacube.Datacube()
        
    curr_poly = grd.where(grd.id == tile).dropna().iloc[0].geometry
    
    json_poly = json.loads(gpd.GeoSeries([curr_poly]).to_json())
    
    dc_geom = geometry.Geometry(json_poly['features'][0]['geometry'], geometry.CRS("EPSG:{}".format(args.epsg)))
    
    ds = getDataset(('1988-01-01', '2020-12-31'), dc_geom, args.epsg)
    
    dc.close()
    
else: 
    
    ds = xr.open_dataset(args.infile)

print("Setting up variables and output files...")

bands = list(ds.data_vars)

# Change threshold based on chi square distribution
ch_thresh = chi2.ppf(0.99, df=len(bands))