Exemplo n.º 1
0
def log_jaccard(im_id: str,
                cls: int,
                true_mask: np.ndarray,
                mask: np.ndarray,
                poly_mask: np.ndarray,
                true_poly: MultiPolygon,
                poly: MultiPolygon,
                valid_polygons=False):
    assert len(mask.shape) == 2
    pixel_jc = utils.mask_tp_fp_fn(mask, true_mask, 0.5)
    if valid_polygons:
        if not true_poly.is_valid:
            true_poly = utils.to_multipolygon(true_poly.buffer(0))
        if not poly.is_valid:
            poly = utils.to_multipolygon(poly.buffer(0))
        tp = true_poly.intersection(poly).area
        fn = true_poly.difference(poly).area
        fp = poly.difference(true_poly).area
        poly_jc = tp, fp, fn
    else:
        poly_jc = utils.mask_tp_fp_fn(poly_mask, true_mask, 0.5)
    logger.info(
        '{} cls-{} pixel jaccard: {:.5f}, polygon jaccard: {:.5f}'.format(
            im_id, cls, jaccard(pixel_jc), jaccard(poly_jc)))
    return pixel_jc, poly_jc
Exemplo n.º 2
0
def pixelwise_vector_f1(gt: List[Polygon],
                        pred: List[Polygon],
                        v: bool = True):
    """
    Measures pixelwise f1-score, but for vector representation instead of raster.

    :param gt: list of shapely Polygons, represents ground truth;
    :param pred: list of shapely Polygons or Points (according to the 'format' param, represents prediction;
    :param format: 'vector' or 'point', means format of prediction and corresponding variant of algorithm;
    :param v: is_verbose
    :return: float, f1-score and string, log
    """
    log = ''
    gt_mp = MultiPolygon(gt)
    pred_mp = MultiPolygon(pred)

    # try making polygons valid
    gt_mp = gt_mp.buffer(0)
    pred_mp = pred_mp.buffer(0)

    tp = gt_mp.intersection(pred_mp).area
    fp = pred_mp.area - tp
    fn = gt_mp.area - tp

    if tp == 0:
        f1 = 0.
    else:
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)
    if v:
        log += 'True Positive = ' + str(tp) + ', False Negative = ' + str(
            fn) + ', False Positive = ' + str(fp) + '\n'

    return f1, log
Exemplo n.º 3
0
def get_geom_list(features, buffer_distance):
    ''' creates a list of geobetries based on an input geojson["features"]
    # requires shapely and the transform coordinate function
    # returns a list with all the geometries appended
    '''

    geom_list = []
    for feature_json in features:
        if feature_json["geometry"]["type"] == "MultiPolygon":
            polygons = []
            for part in feature_json["geometry"]["coordinates"]:

                if len(part) == 1:
                    polygons.append(Polygon(part[0]))
                else:
                    polygons.append(Polygon(part[0], part[:1]))
            geom = MultiPolygon(polygons)
        elif feature_json["geometry"]["type"] == "MultiLineString":
            geom = MultiLineString([
                LineString(coord)
                for coord in feature_json["geometry"]["coordinates"]
            ])
        elif feature_json["geometry"]["type"] == "MultiPoint":
            geom = MultiPoint([
                Point(coord)
                for coord in feature_json["geometry"]["coordinates"]
            ])
        else:
            geom = asShape(feature_json["geometry"])
        if buffer_distance != 0:
            geom = geom.buffer(buffer_distance)
        geom_list.append(geom)
    print("Created  Geometry List")
    return geom_list
Exemplo n.º 4
0
    def cache_mask(
        self,
        ogr_path: Path,
        layer_name: str,
        mask_name: str,
    ) -> None:
        """Cache given mask to disk."""
        mask_directory = self.directory / "mask" / mask_name
        mask_pickle = mask_directory / "mask.pkl"
        if mask_pickle.exists():
            print("Already cached!")
            return
        else:
            mask_directory.mkdir(parents=True, exist_ok=True)

        with fiona.open(ogr_path, "r", layer=layer_name) as src:
            mask = MultiPolygon(
                [vector.fiona_polygon(feature) for feature in track(src)]
            )
            print("Buffering multipolygon pickle... ", end="")
            mask = mask.buffer(0.0)
            print("Done!")

        print("Writing pickle file... ", end="")
        mask_pickle.write_bytes(
            pickle.dumps(mask, protocol=pickle.HIGHEST_PROTOCOL),
        )
        print("Done!")
Exemplo n.º 5
0
    def buildFilledContourLayer(self, polygons, asLayers=False):
        name = self.uOutputName.text()
        zField = self._zField
        zmin=zField+'_min'
        zmax=zField+'_max'
        vl = self.createVectorLayer("MultiPolygon", name, FILLED,
                                   [('index',int),
                                    (zmin,float),
                                    (zmax,float),
                                    ('label',str)
                                   ])
        pr = vl.dataProvider()
        fields = pr.fields()
        msg = list()
        symbols=[]
        ninvalid=0
        dx,dy=self._origin
        for i, level_min, level_max, polygon in polygons:
            level_min=float(level_min)
            level_max=float(level_max)
            levels = (
                self.formatLevel(level_min) + " - " +
                self.formatLevel(level_max) + self.uLabelUnits.text()
                )
            try:
                feat = QgsFeature(fields)
                try:
                    geom=MultiPolygon(polygon)
                    if not geom.is_valid:
                        # Try buffering to create a valid alternative for geometry
                        # Test area is not significantly altered

                        geom2=geom.buffer(0.0)
                        if geom2.area > 0.0 and abs(1-geom.area/geom2.area) < 0.000001:
                            geom=geom2
                        if not geom.is_valid:
                            ninvalid += 1
                    qgeom=QgsGeometry.fromWkt(geom.to_wkt())
                    qgeom.translate(dx,dy)
                    feat.setGeometry(qgeom)
                except:
                    continue
                feat['index']=i
                feat[zmin]=level_min
                feat[zmax]=level_max
                feat['label']=levels
                pr.addFeatures( [ feat ] )
                symbols.append([level_min,levels])
            except Exception as ex:
                self.warnUser(ex.message)
                msg.append(unicode(levels))
        if len(msg) > 0:
            self.warnUser("Levels not represented : "+", ".join(msg),"Filled Contour issue")
        if ninvalid > 0:
            self.warnUser("Matplotlib contouring routine has creating {0} invalid geometries"
                          .format(ninvalid))
        vl.updateExtents()
        vl.commitChanges()
        self.applyRenderer(vl,'polygon',zmin,symbols)
        return vl
Exemplo n.º 6
0
def process_shp(session, spno, shp):
	for feature in shp:
		try:
			props = feature['properties']
			# Convert property names to uppercase
			props = { key.upper(): props[key] for key in props }

			if spno != props['SPNO']:
				log.error('SPNO does not match %s != %s' % (spno, props['SPNO']))
				return

			if props['RNGE'] in (8,9): # TODO - investigate what these numbers mean
				return

			taxon_id = props['TAXONID']

			parts = _taxon_re.match(taxon_id)

			if parts is None:
				log.error("Invalid taxon id format: %s" % taxon_id)
				return

			prefix = parts.group(1)
			suffix = parts.group(2) or ''

			geometry = shape(feature['geometry'])
			if type(geometry) == Polygon:
				geometry = MultiPolygon([geometry])

			geometry = reproject(geometry, pyproj.Proj(shp.crs), pyproj.Proj('+init=EPSG:4326'))

			for s in suffix.split("."):
				taxon_exists = len(session.execute("SELECT 1 FROM taxon WHERE id = :id", { 'id': prefix + s }).fetchall()) > 0
				if taxon_exists:
					session.execute("""INSERT INTO taxon_range (taxon_id, range_id, breeding_range_id, geometry) VALUES
						(:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
							'taxon_id': prefix + s,
							'range_id': props['RNGE'] or None,
							'breeding_range_id': props['BRRNGE'] or None,
							'geom_wkb': shapely.wkb.dumps(geometry)
						}
					)

					if insert_subdivided:
						for geom in subdivide_geometry(geometry.buffer(0)):
							geom = to_multipolygon(geom)
							if not geom.is_empty:
								session.execute("""INSERT INTO taxon_range_subdiv (taxon_id, range_id, breeding_range_id, geometry) VALUES
									(:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
										'taxon_id': prefix + s,
										'range_id': props['RNGE'] or None,
										'breeding_range_id': props['BRRNGE'] or None,
										'geom_wkb': shapely.wkb.dumps(to_multipolygon(geom))
									}
								)

		except:
			log.error("Error processing row: %s" % props)
			raise
def mask_to_polygons(mask,
                     epsilon=1,
                     min_area=1.,
                     engine='opencv',
                     buffer_amount=0.001):
    #    print('Mask toi polygon')
    # __author__ = Konstantin Lopuhin
    # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly

    # first, find contours with cv2: it's much faster than shapely
    if engine == 'opencv':
        # TODO check this shit. Тут добавил >=0.5 чтобы обойти кривые маски в файле (без бинаризации)
        image, contours, hierarchy = cv2.findContours(
            ((mask >= 0.5) * 255).astype(np.uint8), cv2.RETR_CCOMP,
            cv2.CHAIN_APPROX_TC89_KCOS)
        # create approximate contours to have reasonable submission size
        approx_contours = [
            cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours
        ]
        if not contours:
            return MultiPolygon()
        # now messy stuff to associate parent and child contours
        cnt_children = defaultdict(list)
        child_contours = set()
        assert hierarchy.shape[0] == 1
        # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
        for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
            if parent_idx != -1:
                child_contours.add(idx)
                cnt_children[parent_idx].append(approx_contours[idx])
        # create actual polygons filtering by area (removes artifacts)
        all_polygons = []
        for idx, cnt in enumerate(approx_contours):
            if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
                assert cnt.shape[1] == 1
                poly = Polygon(shell=cnt[:, 0, :],
                               holes=[
                                   c[:, 0, :]
                                   for c in cnt_children.get(idx, [])
                                   if cv2.contourArea(c) >= min_area
                               ])
                all_polygons.append(poly)
                # approximating polygons might have created invalid ones, fix them
    else:
        all_polygons = []
        for shape, value in features.shapes(mask.astype(np.int16),
                                            mask=(mask == 1),
                                            transform=rasterio.Affine(
                                                1.0, 0, 0, 0, 1.0, 0)):
            all_polygons.append(shapely.geometry.shape(shape))

    all_polygons = MultiPolygon(all_polygons)
    if True:  # not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(buffer_amount)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons
Exemplo n.º 8
0
def mask_to_polygons(mask, img_id, epsilon=1, min_area=1., test=True):
    """
    Generate polygons from mask
    :param mask:
    :param epsilon:
    :param min_area:
    :return:
    """
    # find contours, cv2 switches the x-y coordiante of mask to y-x in contours
    # This matches the wkt data in train_wkt_v4, which is desirable for submission
    image, contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8),
        cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]

    if not contours:
        return MultiPolygon()

    cnt_children = defaultdict(list)
    child_contours = set()

    assert hierarchy.shape[0] == 1

    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygon filtering by area (remove artifacts)
    all_polygons = []

    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(shell = cnt[:, 0, :],
                           holes = [c[:, 0, :] for c in cnt_children.get(idx, [])
                                    if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])

    id = test_IDs_dict[img_id] if test else train_IDs_dict[img_id]

    x_max = grid_sizes[grid_sizes.ImageId == id].Xmax.values[0]
    y_min = grid_sizes[grid_sizes.ImageId == id].Ymin.values[0]
    x_scaler, y_scaler = x_max / mask.shape[1], y_min / mask.shape[0]

    scaled_pred_polygons = scale(all_polygons, xfact=x_scaler,
                                 yfact=y_scaler, origin=(0., 0., 0.))

    return scaled_pred_polygons
    def mask_to_polygons(self,
                         mask,
                         epsilon=1,
                         min_area=2.,
                         max_area=100,
                         buffer_value=0.0001):
        """transform predicted mask to wkt format"""

        # first, find contours with cv2: it's much faster than shapely
        image, contours, hierarchy = cv2.findContours(
            ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP,
            cv2.CHAIN_APPROX_TC89_KCOS)

        # create approximate contours to have reasonable submission size
        approx_contours = [
            cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours
        ]

        if not contours:
            return MultiPolygon()

        # now messy stuff to associate parent and child contours
        cnt_children = defaultdict(list)
        child_contours = set()
        assert hierarchy.shape[0] == 1

        # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
        for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
            if parent_idx != -1:
                child_contours.add(idx)
                cnt_children[parent_idx].append(approx_contours[idx])

        # create actual polygons filtering by area (removes artifacts)
        all_polygons = []
        for idx, cnt in enumerate(approx_contours):
            if idx not in child_contours and cv2.contourArea(
                    cnt) >= min_area and cv2.contourArea(cnt) < max_area:
                assert cnt.shape[1] == 1
                poly = Polygon(shell=cnt[:, 0, :],
                               holes=[
                                   c[:, 0, :]
                                   for c in cnt_children.get(idx, [])
                                   if cv2.contourArea(c) >= min_area
                                   and cv2.contourArea(c) <= max_area
                               ])
                all_polygons.append(poly)

        # approximating polygons might have created invalid ones, fix them
        all_polygons = MultiPolygon(all_polygons)
        if not all_polygons.is_valid:
            all_polygons = all_polygons.buffer(buffer_value)

            # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
            # need to keep it a Multi throughout
            if all_polygons.type == 'Polygon':
                all_polygons = MultiPolygon([all_polygons])
        return all_polygons
Exemplo n.º 10
0
    def test_get_field_write_target(self):
        p1 = 'Polygon ((-116.94238466549290933 52.12861711455555991, -82.00526805089285176 61.59075286434307372, ' \
             '-59.92695130138864101 31.0207758265680269, -107.72286778108455962 22.0438778075388484, ' \
             '-122.76523743459291893 37.08624746104720771, -116.94238466549290933 52.12861711455555991))'
        p2 = 'Polygon ((-63.08099655131782413 21.31602121140134898, -42.70101185946779765 9.42769680782217279, ' \
             '-65.99242293586783603 9.912934538580501, -63.08099655131782413 21.31602121140134898))'
        p1 = wkt.loads(p1)
        p2 = wkt.loads(p2)

        mp1 = MultiPolygon([p1, p2])
        mp2 = mp1.buffer(0.1)
        geoms = [mp1, mp2]
        gvar = GeometryVariable(name='gc',
                                value=geoms,
                                dimensions='elementCount')
        gc = gvar.convert_to(node_dim_name='n_node')
        field = gc.parent
        self.assertEqual(field.grid.node_dim.name, 'n_node')

        actual = DriverESMFUnstruct._get_field_write_target_(field)
        self.assertEqual(field.grid.node_dim.name, 'n_node')
        self.assertNotEqual(id(field), id(actual))
        self.assertEqual(actual['numElementConn'].dtype, np.int32)
        self.assertEqual(actual['elementConn'].dtype, np.int32)
        self.assertNotIn(field.grid.cindex.name, actual)
        self.assertEqual(actual['nodeCoords'].dimensions[0].name, 'nodeCount')

        path = self.get_temporary_file_path('foo.nc')
        actual.write(path)

        # Optional test for loading the mesh file if ESMF is available.
        try:
            import ESMF
        except ImportError:
            pass
        else:
            _ = ESMF.Mesh(filename=path, filetype=ESMF.FileFormat.ESMFMESH)

        path2 = self.get_temporary_file_path('foo2.nc')
        driver = DriverKey.NETCDF_ESMF_UNSTRUCT
        field.write(path2, driver=driver)

        # Test the polygons are equivalent when read from the ESMF unstructured file.
        rd = ocgis.RequestDataset(path2, driver=driver)
        self.assertEqual(rd.driver.key, driver)
        efield = rd.get()
        self.assertEqual(efield.driver.key, driver)
        grid_actual = efield.grid
        self.assertEqual(efield.driver.key, driver)
        self.assertEqual(grid_actual.parent.driver.key, driver)
        self.assertEqual(grid_actual.x.ndim, 1)

        for g in grid_actual.archetype.iter_geometries():
            self.assertPolygonSimilar(g[1], geoms[g[0]])

        ngv = grid_actual.archetype.convert_to()
        self.assertIsInstance(ngv, GeometryVariable)
Exemplo n.º 11
0
def mask_to_polygons(mask, epsilon=5, min_area=.1, rect_polygon=False):
    horiz_axis = float(mask.shape[0] - 1) / 2
    vert_axis = float(mask.shape[1] - 1) / 2
    # first, find contours with cv2: it's much faster than shapely
    image, contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP,
        cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours to have reasonable submission size
    approx_contours = [
        cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours
    ]
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            x_coord = cnt[:, 0, 0]
            y_coord = cnt[:, 0, 1]
            cnt[:, 0, 1] = 2 * vert_axis - y_coord
            if rect_polygon:
                cnt = cv2.boxPoints(
                    cv2.minAreaRect(cnt))  # rectangular polygons
                poly = Polygon(shell=cnt[:, :],
                               holes=[
                                   c[:, 0, :]
                                   for c in cnt_children.get(idx, [])
                                   if cv2.contourArea(c) >= min_area
                               ])
            else:
                poly = Polygon(shell=cnt[:, 0, :],
                               holes=[
                                   c[:, 0, :]
                                   for c in cnt_children.get(idx, [])
                                   if cv2.contourArea(c) >= min_area
                               ])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons
Exemplo n.º 12
0
def get_intersections(offs, array_p, array_m, vfunc, gt, n_pixels):
    polys = []
    indices = []
    if len(array_p) > 0 and len(array_m) > 0:
        array = np.multiply(array_p, array_m)
        unique_values = np.unique(array)
        temp = np.zeros(array.shape, dtype=np.uint8)
        for u in unique_values:
            if u != 0:
                temp[array == u] = 1
                contours, hier = cv2.findContours(temp, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_TC89_L1)
                p = dict()
                for i, c in enumerate(contours):
                    if len(c) > 2 and Polygon(p[0] for p in c).area > n_pixels:
                        coords = vfunc(*zip(
                            np.array([[p[0][0] + offs[0],
                                       p[0][1] + offs[1]][::-1]
                                      for p in c]).T),
                                       gt=gt)
                        coords = np.array([coords[0][0], coords[1][0]])
                        if hier[0, i, 3] == -1:
                            p[i] = dict()
                            p[i]['exterior'] = coords
                        else:
                            if 'interior' in p[hier[0, i, 3]]:
                                p[hier[0, i, 3]]['interior'].append(coords)
                            else:
                                p[hier[0, i, 3]]['interior'] = [coords]
                if p:
                    pp = []
                    for key in p.keys():
                        pp.append(
                            Polygon(
                                np.array(p[key]['exterior']).T, [
                                    np.array(p[key]['interior'][i]).T
                                    for i in range(len(p[key]['interior']))
                                ] if 'interior' in p[key] else []))
                    if len(pp) > 1:
                        poly = MultiPolygon(pp)
                    else:
                        poly = pp[0]

                    if not poly.is_valid:
                        poly_b = poly.buffer(0)
                        if not poly_b.is_empty and poly_b.area > poly.area / 2:
                            polys.append(poly_b)
                        else:
                            polys.append(poly)
                    else:
                        polys.append(poly)

                    indices.append(u)
                temp = np.zeros(array.shape, dtype=np.uint8)
    return polys, indices
Exemplo n.º 13
0
    def test_get_field_write_target(self):
        p1 = 'Polygon ((-116.94238466549290933 52.12861711455555991, -82.00526805089285176 61.59075286434307372, ' \
             '-59.92695130138864101 31.0207758265680269, -107.72286778108455962 22.0438778075388484, ' \
             '-122.76523743459291893 37.08624746104720771, -116.94238466549290933 52.12861711455555991))'
        p2 = 'Polygon ((-63.08099655131782413 21.31602121140134898, -42.70101185946779765 9.42769680782217279, ' \
             '-65.99242293586783603 9.912934538580501, -63.08099655131782413 21.31602121140134898))'
        p1 = wkt.loads(p1)
        p2 = wkt.loads(p2)

        mp1 = MultiPolygon([p1, p2])
        mp2 = mp1.buffer(0.1)
        geoms = [mp1, mp2]
        gvar = GeometryVariable(name='gc', value=geoms, dimensions='elementCount')
        gc = gvar.convert_to(node_dim_name='n_node')
        field = gc.parent
        self.assertEqual(field.grid.node_dim.name, 'n_node')

        actual = DriverESMFUnstruct._get_field_write_target_(field)
        self.assertEqual(field.grid.node_dim.name, 'n_node')
        self.assertNotEqual(id(field), id(actual))
        self.assertEqual(actual['numElementConn'].dtype, np.int32)
        self.assertEqual(actual['elementConn'].dtype, np.int32)
        self.assertNotIn(field.grid.cindex.name, actual)
        self.assertEqual(actual['nodeCoords'].dimensions[0].name, 'nodeCount')

        path = self.get_temporary_file_path('foo.nc')
        actual.write(path)

        try:
            import ESMF
        except ImportError:
            pass
        else:
            _ = ESMF.Mesh(filename=path, filetype=ESMF.FileFormat.ESMFMESH)

        path2 = self.get_temporary_file_path('foo2.nc')
        driver = DriverKey.NETCDF_ESMF_UNSTRUCT
        field.write(path2, driver=driver)

        # Test the polygons are equivalent when read from the ESMF unstructured file.
        rd = ocgis.RequestDataset(path2, driver=driver)
        self.assertEqual(rd.driver.key, driver)
        efield = rd.get()
        self.assertEqual(efield.driver.key, driver)
        grid_actual = efield.grid
        self.assertEqual(efield.driver.key, driver)
        self.assertEqual(grid_actual.parent.driver.key, driver)
        self.assertEqual(grid_actual.x.ndim, 1)

        for g in grid_actual.archetype.iter_geometries():
            self.assertPolygonSimilar(g[1], geoms[g[0]])

        ngv = grid_actual.archetype.convert_to()
        self.assertIsInstance(ngv, GeometryVariable)
Exemplo n.º 14
0
    def multipolygon(self) -> MultiPolygon:
        triangles = self._grd.elements.triangulation.triangles

        triangle_edges, counts = numpy.unique(
            numpy.sort(
                numpy.concatenate(
                    [triangles[:, :2], triangles[:, 1:], triangles[:, [0, 2]]],
                    axis=0),
                axis=1,
            ),
            axis=0,
            return_counts=True,
        )
        boundary_edges = triangle_edges[counts == 1]

        boundary_edge_points = self._grd.nodes.iloc[:, :2].values[
            boundary_edges]

        exterior_polygons = collect_interiors(
            list(polygonize(boundary_edge_points.tolist())))

        coords = self._grd.nodes.values
        x = coords[:, 0]
        y = coords[:, 1]
        total_triangle_area = numpy.sum(
            numpy.abs((x[triangles[:, 0]] *
                       (y[triangles[:, 1]] - y[triangles[:, 2]]) +
                       x[triangles[:, 1]] *
                       (y[triangles[:, 2]] - y[triangles[:, 0]]) +
                       x[triangles[:, 2]] *
                       (y[triangles[:, 0]] - y[triangles[:, 1]])) / 2))

        if not numpy.isclose(exterior_polygons[-1].area, total_triangle_area):
            polygon_collection = []
            coords = self._grd.coords.values
            for rings in self.sorted().values():
                exterior = coords[rings['exterior'][:, 0], :]
                interiors = []
                for interior in rings['interiors']:
                    interiors.append(coords[interior[:, 0], :])
                polygon_collection.append(Polygon(exterior, interiors))

            exterior_polygons.extend(polygon_collection)
            exterior_polygons = collect_interiors(exterior_polygons)

        multipolygon = MultiPolygon(exterior_polygons)
        if not multipolygon.is_valid:
            try:
                multipolygon = multipolygon.buffer(0)
            except Exception as error:
                logging.exception(error)

        return multipolygon
Exemplo n.º 15
0
def geometry_from_feature_collection(feature_collection):
    polygons = []
    for feature in feature_collection['features']:
        geometry = feature['geometry']
        if geometry['type'] == 'Polygon':
            polygons.append(asShape(geometry))

    if polygons:
        mp = MultiPolygon(polygons)
        if not mp.is_valid:
            mp = mp.buffer(0)
        return mp
Exemplo n.º 16
0
def geometry_from_feature_collection(feature_collection):
    polygons = []
    for feature in feature_collection['features']:
        geometry = feature['geometry']
        if geometry['type'] == 'Polygon':
            polygons.append(asShape(geometry))

    if polygons:
        mp = MultiPolygon(polygons)
        if not mp.is_valid:
            mp = mp.buffer(0)
        return mp
Exemplo n.º 17
0
def create_land_areas(polygon_shapefile,
                      extents_wktfile,
                      buffer=land_area_buffer,
                      tolerance=land_area_tolerance,
                      min_points=0,
                      verbose=False):
    areas = []
    driver = ogr.GetDriverByName('ESRI Shapefile')
    print("Loading land area definition from " + polygon_shapefile)
    datasource = driver.Open(polygon_shapefile, 0)
    if datasource is None:
        raise RuntimeError('Cannot open land areas file ' + polygon_file)
    layer = datasource.GetLayer()
    npoints = 0
    nskip = 0
    areas = []
    for feature in layer:
        mp = wkb.loads(feature.GetGeometryRef().ExportToWkb())
        if type(mp) == Polygon:
            mp = [mp]
        for p in mp:
            npoints1 = len(p.exterior.coords)
            if min_points and len(p.exterior.coords) < min_points:
                nskip += 1
                continue
            p = Polygon(p.exterior)
            p = buffered_polygon(p, buffer, tolerance)
            npoints2 = len(p.exterior.coords)
            if type(p) == Polygon:
                p = [p]
            areas.extend(p)
            npoints += npoints2
            if verbose:
                print("Polygon: {0} points reduced to {1} points".format(
                    npoints1, npoints2))

    if verbose:
        print("Skipped {0} polygons < {1} points".format(nskip, min_points))
    if areas:
        if verbose:
            print(
                "Forming union of areas - total of {0} points in {1} polygons".
                format(npoints, len(areas)))
        areas = MultiPolygon(areas)
        areas = areas.buffer(0)
        try:
            if verbose:
                print("Writing wkt file {0}".format(extents_wktfile))
            from shapely.wkt import dumps
            with open(extents_wktfile, "w") as laf:
                laf.write(dumps(areas))
        except:
            pass
Exemplo n.º 18
0
def mask_to_polygons(mask, epsilon=5, min_area=1.):
    """
    Pravi (multi)poligone od output slike mreze
    Input:
    - mask: mask image
    - epsilon: margin of error
    - min_area: minimal area for polygon

    Returns:
    - all_polygons: all polygons found in mask
    """
    # __author__ = Konstantin Lopuhin
    # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly

    # first, find contours with cv2: it's much faster than shapely
    contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8),
        cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(
                shell=cnt[:, 0, :],
                holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                       if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons
Exemplo n.º 19
0
    def polygonize(self, mask):
    	"""Create polygons from binary pixel masks and output as a MultiPolygon. Uses
    	OpenCV's ``findContours`` function to extract polygons and the Douglas Peucker algorithm
    	to simplify them.
    	"""
        mask[mask < 0.5] = 0
        mask[mask > 0] = 1

        # first, find contours with cv2: it's much faster than shapely
        image, contours, hierarchy = cv2.findContours(
            ((mask == 1).astype(np.uint8) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
        # create approximate contours to have reasonable submission size
        approx_contours = [cv2.approxPolyDP(cnt, self.epsilon, True)
                           for cnt in contours]
        if not contours:
            return MultiPolygon()
        # now messy stuff to associate parent and child contours
        cnt_children = defaultdict(list)
        child_contours = set()
        assert hierarchy.shape[0] == 1
        # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
        for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
            if parent_idx != -1:
                child_contours.add(idx)
                cnt_children[parent_idx].append(approx_contours[idx])
        # create actual polygons filtering by area (removes artifacts)
        all_polygons = []
        for idx, cnt in enumerate(approx_contours):
            if idx not in child_contours and cv2.contourArea(cnt) >= self.min_area:
                assert cnt.shape[1] == 1
                try:
                    poly = Polygon(
                        shell=cnt[:, 0, :],
                        holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                               if cv2.contourArea(c) >= self.min_area])
                    all_polygons.append(poly)
                except:
                    pass
        # approximating polygons might have created invalid ones, fix them
        all_polygons = MultiPolygon(all_polygons)
        if not all_polygons.is_valid:
            all_polygons = all_polygons.buffer(0)
            # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
            # need to keep it a Multi throughout
            if all_polygons.type == 'Polygon':
                all_polygons = MultiPolygon([all_polygons])
        return all_polygons
Exemplo n.º 20
0
    def mask2Polygons(mask):
        """
        将二值化图像转为多边形对象列表
        :param mask: ndarray 类型。二值化预测结果
        :return: list 类型。多边形对象列表
        """
        epsilon = 2
        # first, find contours with cv2: it's much faster than shapely
        image, contours, hierarchy = cv2.findContours(((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP,
                                                      cv2.CHAIN_APPROX_TC89_KCOS)
        # create approximate contours to have reasonable submission size
        approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                           for cnt in contours]
        if not contours:
            return MultiPolygon()
        # now messy stuff to associate parent and child contours
        cnt_children = defaultdict(list)
        child_contours = set()
        assert hierarchy.shape[0] == 1
        # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
        for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
            if parent_idx != -1:
                child_contours.add(idx)
                cnt_children[parent_idx].append(approx_contours[idx])
        # create actual polygons filtering by area (removes artifacts)
        all_polygons = []
        for idx, cnt in enumerate(approx_contours):
            if idx not in child_contours and cv2.contourArea(cnt) >= 1.:
                assert cnt.shape[1] == 1
                poly = Polygon(
                    shell=cnt[:, 0, :],
                    holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                           if cv2.contourArea(c) >= 1.])
                all_polygons.append(poly)

        all_polygons = MultiPolygon(all_polygons)

        if not all_polygons.is_valid:
            # return all_polygons.buffer(0)
            all_polygons = all_polygons.buffer(0)

            if all_polygons.type == 'Polygon':
                all_polygons = MultiPolygon([all_polygons])

        return all_polygons
Exemplo n.º 21
0
def mask_to_polygons(mask, epsilon=5, min_area=1.):
    # __author__ = Konstantin Lopuhin
    # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly

    # first, find contours with cv2: it's much faster than shapely
    threashold_mask = ((mask == 1) * 255).astype(np.uint8)

    # opencv 3 
    # image, contours, hierarchy = cv2.findContours(threashold_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
    contours, hierarchy = cv2.findContours(threashold_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)

    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(
                shell=cnt[:, 0, :],
                holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                       if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons
Exemplo n.º 22
0
    def load_coordinates(self):
        with open(self.file_name) as f:
            folder = parser.parse(f).getroot().Document.Folder

        # Need to check for multiple LineString elements
        _lines = []
        alts = []
        for pm in folder.Placemark:
            _line = []
            alt = None
            for points in pm.LineString.coordinates.text.split():
                lon, lat, alt = points.split(",")
                _line.append((float(lon), float(lat)))
            _lines.append(_line)
            alts.append(float(alt))
        ml = MultiLineString(_lines)
        self.region_data, regions = self.find_regions(ml, alts)
        mlp = MultiPolygon(regions)
        self.boundary = unary_union(mlp.buffer(0.001)).exterior.xy
def polygon_coor(inpolygon):
    eps = 0.75  # width for dilating and eroding (buffer)
    dist = 0.3  # threshold distance
    # read the original shapefile
    df = gpd.read_file(inpolygon)
    # create new result shapefile
    col = ['geometry']
    res = gpd.GeoDataFrame(columns=col)
    df_explode = df.explode()
    dis = []
    for i, j in list(itertools.combinations(df_explode.index, 2)):
        distance = df_explode.geometry[i].distance(df_explode.geometry[j])
        # distance between polygons i and j in the shapefile
        dis.append(distance)
        if distance < dist:
            e = MultiPolygon([df_explode.geometry[i], df_explode.geometry[j]])
            fx = e.buffer(eps, 1, join_style=JOIN_STYLE.mitre).buffer(
                -eps, 1, join_style=JOIN_STYLE.mitre)
            res = res.append({'geometry': fx}, ignore_index=True)
    res_explode = res.explode()
    res_explode = gpd.GeoDataFrame(
        {'geometry': unary_union(res_explode["geometry"])})
    res_explode["area"] = res_explode['geometry'].area
    #Compute 95 percentile of the area value as the major polygon
    include_area = np.percentile(res_explode["area"].to_numpy(), 95)
    res_explode1 = res_explode[res_explode["area"] > include_area]
    #Simplify the shape
    res_explode1 = res_explode1.simplify(0.05, preserve_topology=True)
    coordinates = []
    #Collect coordinates for the verticies
    for i in res_explode1:
        coordlist = list(zip(i.exterior.coords.xy[0], i.exterior.coords.xy[1]))
        #print (coordlist)
        for j in coordlist:
            coordinates.insert(0, round(j[1], 6))
            coordinates.insert(0, round(j[0], 6))
    # save the resulting shapefile to disk
    res_explode1.crs = df.crs
    res_explode1.to_file(
        os.path.join(os.path.dirname(inpolygon),
                     os.path.basename(inpolygon)[:-4] + "_simplified.shp"))
    #coordinates.reverse()
    return (','.join(map(str, coordinates)))
Exemplo n.º 24
0
def mask_to_polygons(mask, epsilon=1, min_area=1.):
    # __author__ = Konstantin Lopuhin
    # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly

    # first, find contours with cv2: it's much faster than shapely
    image, contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8),
        cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1)

    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]

    if not approx_contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(
                shell=cnt[:, 0, :],
                holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                       if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # need to re add in the check for type of all_polygons
        all_polygons = MultiPolygon(all_polygons)
    return all_polygons
Exemplo n.º 25
0
def mask2multipolygon(mask_data,
                      mask,
                      trans=(1.0, 0.0, 0.0, 0.0, 1.0, 0.0),
                      conn=4):
    geom_results = ({
        'properties': {
            'raster_val': v
        },
        'geometry': s
    } for i, (s, v) in enumerate(
        shapes(mask_data, mask=mask, connectivity=conn, transform=trans)))
    geometries = list(geom_results)

    multi = MultiPolygon(
        [shape(geometries[i]['geometry']) for i in range(len(geometries))])

    if not (multi.is_valid):
        print('Not a valid polygon, using it' 's buffer!')
        multi = multi.buffer(0)

    return multi
Exemplo n.º 26
0
def polygonize(mask, epsilon=1., min_area=10.):
    # https://www.programcreek.com/python/example/70440/cv2.findContours
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP,
                                           cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours to have reasonable submission size
    approx_contours = [
        cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours
    ]
    approx_contours = contours
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(shell=cnt[:, 0, :],
                           holes=[
                               c[:, 0, :] for c in cnt_children.get(idx, [])
                               if cv2.contourArea(c) >= min_area
                           ])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        all_polygons = all_polygons.buffer(0)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons
Exemplo n.º 27
0
def mask2multipolygon(mask_data, mask, transform=IDENTITY, connectivity=4):
    """Convert from binary mask to shapely multipolygon."""
    geom_results = ({
        'properties': {
            'raster_val': v
        },
        'geometry': s
    } for i, (s, v) in enumerate(
        shapes(mask_data,
               mask=mask,
               connectivity=connectivity,
               transform=transform)))
    geometries = list(geom_results)

    multi = MultiPolygon(
        [shape(geometries[i]['geometry']) for i in range(len(geometries))])

    if not multi.is_valid:
        print('Not a valid polygon, using it' 's buffer!')
        multi = multi.buffer(0)

    return multi
Exemplo n.º 28
0
def evolve_agat(first_layer,
                fig=None,
                N_apexes=-1,
                layer_width=0.05,
                min_area=0.001):
    if (N_apexes == -1): N_apexes = len(first_layer)
    if fig is None:
        fig = plt.figure(1, figsize=(5, 5), dpi=90)
    if len(fig.get_axes()) == 0:
        ax = fig.add_subplot(111)
        ax.set_aspect(1)
        ax.set_title('Agate')
        ax.set_facecolor('black')
    else:
        ax = fig.get_axes()[0]
    layer = MultiPolygon([Polygon(first_layer)])
    while (layer.area > min_area):
        for polygon in layer:
            x, y = polygon.exterior.xy
            ax.plot(x, y, choice(colours, 1)[0])
            ax.fill(x, y, choice(colours, 1)[0])
        layer = layer.buffer(-layer_width, N_apexes)
        if (layer.__class__.__name__ == 'Polygon'):
            layer = MultiPolygon([layer])
Exemplo n.º 29
0
def zonal_stats(vectors,
                raster,
                layer_num=0,
                band_num=1,
                func=None,
                nodata_value=None,
                categorical=False,
                stats=None,
                copy_properties=False,
                all_touched=False,
                transform=None):

    if not stats:
        if not categorical:
            stats = ['count', 'min', 'max', 'mean', 'std']
            if func:
                stats.append('func')

    # must have transform arg
    if not transform:
        raise Exception("Must provide the 'transform' kwarg")
    rgt = transform
    rsize = (raster.shape[1], raster.shape[0])

    rbounds = raster_extent_as_bounds(rgt, rsize)
    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)
    global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
    global_src_array = raster

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []
    entity_images = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            geom = shape(feat['geometry'])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon(
                [box(*(pt.buffer(buff).bounds)) for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]

        # calculate new geotransform of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds)

        new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0,
                  (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5])

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
            img = {'__fid__': i, 'img': None}
        else:
            # derive array from global source extent array
            # useful *only* when disk IO or raster format inefficiencies
            # are your limiting factor
            # advantage: reads raster data in one pass before loop
            # disadvantage: large vector extents combined with big rasters
            # need lotsa memory
            xa = src_offset[0] - global_src_offset[0]
            ya = src_offset[1] - global_src_offset[1]
            xb = xa + src_offset[2]
            yb = ya + src_offset[3]
            src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1,
                                 gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)

            if all_touched:
                gdal.RasterizeLayer(rvds, [1],
                                    mem_layer,
                                    None,
                                    None,
                                    burn_values=[1],
                                    options=['ALL_TOUCHED=True'])
            else:
                gdal.RasterizeLayer(rvds, [1],
                                    mem_layer,
                                    None,
                                    None,
                                    burn_values=[1],
                                    options=['ALL_TOUCHED=False'])
            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explictly
            masked = np.ma.MaskedArray(src_array,
                                       mask=np.logical_or(
                                           src_array == nodata_value,
                                           np.logical_not(rv_array)))

            feature_stats = {}

            if 'min' in stats:
                feature_stats['min'] = float(masked.min())
            if 'max' in stats:
                feature_stats['max'] = float(masked.max())
            if 'mean' in stats:
                feature_stats['mean'] = float(masked.mean())
            if 'count' in stats:
                feature_stats['count'] = int(masked.count())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            # optional
            if 'func' in stats:
                feature_stats[func.__name__] = func(masked)
            if 'sum' in stats:
                feature_stats['sum'] = float(masked.sum())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            if 'median' in stats:
                feature_stats['median'] = float(np.median(masked.compressed()))
            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    rmax = float(masked.max())
                feature_stats['range'] = rmax - rmin
            img = {'__fid__': i, 'img': masked}

        # Use the enumerated id as __fid__
        feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)
        entity_images.append(img)
    return results, entity_images
Exemplo n.º 30
0
def raster_stats(
    vectors,
    raster,
    layer_num=0,
    band_num=1,
    nodata_value=None,
    exclude_ranges=None,
    global_src_extent=False,
    categorical=False,
    stats=None,
    copy_properties=False,
):

    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, basestring):
            if stats in ["*", "ALL"]:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x not in VALID_STATS:
            raise RasterStatsError("Stat `%s` not valid;" " must be one of \n %r" % (x, VALID_STATS))

    # print "helloRezaTest"
    run_count = False
    if categorical or "majority" in stats or "minority" in stats or "unique" in stats:
        # run the counter once, only if needed
        run_count = True

    rds = gdal.Open(raster, GA_ReadOnly)
    if not rds:
        raise RasterStatsError("Cannot open %r as GDAL raster" % raster)
    rb = rds.GetRasterBand(band_num)
    rgt = rds.GetGeoTransform()
    rsize = (rds.RasterXSize, rds.RasterYSize)
    rbounds = raster_extent_as_bounds(rgt, rsize)

    if nodata_value is not None:
        nodata_value = float(nodata_value)
        rb.SetNoDataValue(nodata_value)
    else:
        nodata_value = rb.GetNoDataValue()

    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)

    if global_src_extent:
        # create an in-memory numpy array of the source raster data
        # covering the whole extent of the vector layer
        if strategy != "ogr":
            raise RasterStatsError("global_src_extent requires OGR vector")

        # find extent of ALL features
        ds = ogr.Open(vectors)
        layer = ds.GetLayer(layer_num)
        ex = layer.GetExtent()
        # transform from OGR extent to xmin, ymin, xmax, ymax
        layer_extent = (ex[0], ex[2], ex[1], ex[3])

        global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent)
        global_src_array = rb.ReadAsArray(*global_src_offset)

    mem_drv = ogr.GetDriverByName("Memory")
    driver = gdal.GetDriverByName("MEM")

    results = []

    for i, feat in enumerate(features_iter):
        if feat["type"] == "Feature":
            geom = shape(feat["geometry"])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms])
        elif geom.type == "Point":
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]

        # calculate new geotransform of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds)

        new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5])

        if src_offset[2] < 0 or src_offset[3] < 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                # fastest option when you have fast disks and well-indexed raster
                # advantage: each feature uses the smallest raster chunk
                # disadvantage: lots of disk reads on the source raster
                src_array = rb.ReadAsArray(*src_offset)
            else:
                # derive array from global source extent array
                # useful *only* when disk IO or raster format inefficiencies are your limiting factor
                # advantage: reads raster data in one pass before loop
                # disadvantage: large vector extents combined with big rasters need lotsa memory
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource("out")
            mem_layer = mem_ds.CreateLayer("out", spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create("rvds", src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)

            gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1])
            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explictly
            # masked = np.ma.MaskedArray(
            #    src_array,
            #    mask=np.logical_or(
            #        src_array == nodata_value,# 1 if true
            #        np.logical_not(rv_array)  # flips 0s to 1s
            #    )
            # )

            # masked = np.ma.masked_outside(src_array,1,100)
            # masked = np.ma.masked_where(np.logical_or(src_array<1,src_array>100),src_array);

            # nodata_value_min = 1
            # nodata_value_max = 100
            masked = np.ma.masked_where(False, src_array)  # start with all

            # if you want to exclude,
            # make it true where the range is not specified
            # start with true
            # then set the range to false

            # nodata_value=105
            # nodata_value_min=1
            # nodata_value_max=100
            # 1,50 60,100
            places_to_mask = False * len(src_array)
            places_to_mask = np.logical_or(np.logical_not(rv_array), places_to_mask)
            if nodata_value is not None:
                places_to_mask = np.logical_or(src_array == nodata_value, places_to_mask)

            if exclude_ranges is not None:
                for range in exclude_ranges.split(" "):
                    nodata_values = range.split(",")
                    nodata_value_min = int(nodata_values[0])
                    nodata_value_max = int(nodata_values[1])
                    places_to_mask = np.logical_or(
                        np.logical_and(src_array >= nodata_value_min, src_array <= nodata_value_max), places_to_mask
                    )

            masked = np.ma.masked_where(places_to_mask, src_array)

            if run_count:
                pixel_count = Counter(masked.compressed())

            if categorical:
                feature_stats = dict(pixel_count)
            else:
                feature_stats = {}

            if "min" in stats:
                feature_stats["min"] = float(masked.min())
            if "max" in stats:
                feature_stats["max"] = float(masked.max())
            if "mean" in stats:
                feature_stats["mean"] = float(masked.mean())
            if "count" in stats:
                feature_stats["count"] = int(masked.count())
            # optional
            if "sum" in stats:
                feature_stats["sum"] = float(masked.sum())
            if "std" in stats:
                feature_stats["std"] = float(masked.std())
            if "median" in stats:
                feature_stats["median"] = float(np.median(masked.compressed()))
            if "majority" in stats:
                try:
                    feature_stats["majority"] = pixel_count.most_common(1)[0][0]
                except IndexError:
                    feature_stats["majority"] = None
            if "minority" in stats:
                try:
                    feature_stats["minority"] = pixel_count.most_common()[-1][0]
                except IndexError:
                    feature_stats["minority"] = None
            if "unique" in stats:
                feature_stats["unique"] = len(pixel_count.keys())
            if "range" in stats:
                try:
                    rmin = feature_stats["min"]
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats["max"]
                except KeyError:
                    rmax = float(masked.max())
                feature_stats["range"] = rmax - rmin

        try:
            # Use the provided feature id as __fid__
            feature_stats["__fid__"] = feat["id"]
        except KeyError:
            # use the enumerator
            feature_stats["__fid__"] = i

        if feat.has_key("properties") and copy_properties:
            for key, val in feat["properties"].items():
                feature_stats[key] = val

        results.append(feature_stats)

    return results
Exemplo n.º 31
0
def raster_stats(vectors,
                 raster,
                 layer_num=0,
                 band_num=1,
                 nodata_value=None,
                 global_src_extent=False,
                 categorical=False,
                 stats=None,
                 copy_properties=False):

    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, basestring):
            if stats in ['*', 'ALL']:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x not in VALID_STATS:
            raise RasterStatsError("Stat `%s` not valid;" \
                " must be one of \n %r" % (x, VALID_STATS))

    run_count = False
    if categorical or 'majority' in stats or 'minority' in stats or 'unique' in stats or 'all' in stats:
        # run the counter once, only if needed
        run_count = True

    rds = gdal.Open(raster, GA_ReadOnly)
    if not rds:
        raise RasterStatsError("Cannot open %r as GDAL raster" % raster)
    rb = rds.GetRasterBand(band_num)
    rgt = rds.GetGeoTransform()
    rsize = (rds.RasterXSize, rds.RasterYSize)
    rbounds = raster_extent_as_bounds(rgt, rsize)

    if nodata_value is not None:
        nodata_value = float(nodata_value)
        rb.SetNoDataValue(nodata_value)
    else:
        nodata_value = rb.GetNoDataValue()

    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)

    if global_src_extent:
        # create an in-memory numpy array of the source raster data
        # covering the whole extent of the vector layer
        if strategy != "ogr":
            raise RasterStatsError("global_src_extent requires OGR vector")

        # find extent of ALL features
        ds = ogr.Open(vectors)
        layer = ds.GetLayer(layer_num)
        ex = layer.GetExtent()
        # transform from OGR extent to xmin, ymin, xmax, ymax
        layer_extent = (ex[0], ex[2], ex[1], ex[3])

        global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent)
        global_src_array = rb.ReadAsArray(*global_src_offset)

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            try:
                geom = shape(feat['geometry'])
            except:
                next
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon(
                [box(*(pt.buffer(buff).bounds)) for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]

        # calculate new geotransform of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds)

        new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0,
                  (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5])

        if src_offset[2] < 0 or src_offset[3] < 0:
            # we're off the raster completely, no overlap at all, so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                # fastest option when you have fast disks and well-indexed raster
                # advantage: each feature uses the smallest raster chunk
                # disadvantage: lots of disk reads on the source raster
                src_array = rb.ReadAsArray(*src_offset)

                if src_array is None:
                    src_offset = (src_offset[0], src_offset[1], src_offset[2],
                                  src_offset[3] - 1)
                    src_array = rb.ReadAsArray(*src_offset)

            else:
                # derive array from global source extent array
                # useful *only* when disk IO or raster format inefficiencies are your limiting factor
                # advantage: reads raster data in one pass before loop
                # disadvantage: large vector extents combined with big rasters need lot of memory
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1,
                                 gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)

            gdal.RasterizeLayer(rvds, [1],
                                mem_layer,
                                None,
                                None,
                                burn_values=[1])
            rv_array = rvds.ReadAsArray()
            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explicitly
            # ATTENTION : probleme possible si src_array == None.

            test_ok = True
            if src_array is None:
                #print("WARNING!!! src_array = "+ str(src_array) + ", nodata_value = " + str(nodata_value))
                test_ok = False
            else:
                masked = np.ma.MaskedArray(src_array,
                                           mask=np.logical_or(
                                               src_array is nodata_value,
                                               np.logical_not(rv_array)))

            if run_count:
                if test_ok:
                    pixel_count = Counter(masked.compressed())
                else:
                    pixel_count = 0
            if categorical:
                feature_stats = dict(pixel_count)
            else:
                feature_stats = {}

            if 'min' in stats:
                if test_ok and masked.min().any():
                    try:
                        feature_stats['min'] = float(masked.min())
                    except:
                        feature_stats['min'] = 0.0
                else:
                    feature_stats['min'] = 0.0
            if 'max' in stats:
                if test_ok and masked.max().any():
                    try:
                        feature_stats['max'] = float(masked.max())
                    except:
                        feature_stats['max'] = 0.0
                else:
                    feature_stats['max'] = 0.0
            if 'mean' in stats:
                if test_ok and masked.mean().any():
                    try:
                        feature_stats['mean'] = float(masked.mean())
                    except:
                        feature_stats['mean'] = 0.0
                else:
                    feature_stats['mean'] = 0.0
            if 'count' in stats:
                if test_ok and masked.count().any():
                    try:
                        feature_stats['count'] = int(masked.count())
                    except:
                        feature_stats['count'] = 0
                else:
                    feature_stats['count'] = 0
            # optional
            if 'sum' in stats:
                if test_ok and masked.sum().any():
                    try:
                        feature_stats['sum'] = float(masked.sum())
                    except:
                        feature_stats['sum'] = 0.0
                else:
                    feature_stats['sum'] = 0.0
            if 'std' in stats:
                if test_ok and masked.std().any():
                    try:
                        feature_stats['std'] = float(masked.std())
                    except:
                        feature_stats['std'] = 0.0
                else:
                    feature_stats['std'] = 0.0
            if 'median' in stats:
                if test_ok and masked.compressed().any():
                    try:
                        feature_stats['median'] = float(
                            np.median(masked.compressed()))
                    except:
                        feature_stats['median'] = 0.0
                else:
                    feature_stats['median'] = 0.0

            # Ajout option 'all' GFT le 17/03/2014
            if 'all' in stats:
                try:
                    feature_stats['all'] = pixel_count.most_common()
                except IndexError:
                    feature_stats['all'] = None

            if 'majority' in stats:
                try:
                    feature_stats['majority'] = pixel_count.most_common(
                        1)[0][0]
                except IndexError:
                    feature_stats['majority'] = None

            if 'minority' in stats:
                try:
                    feature_stats['minority'] = pixel_count.most_common(
                    )[-1][0]
                except IndexError:
                    feature_stats['minority'] = None

            if 'unique' in stats:
                if test_ok:
                    feature_stats['unique'] = len(pixel_count.keys())
                else:
                    feature_stats['unique'] = 0

            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    if test_ok and masked.min().any():
                        try:
                            rmin = float(masked.min())
                        except:
                            rmin = 0.0
                    else:
                        rmin = 0.0
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    if test_ok and masked.max().any():
                        try:
                            rmax = float(masked.max())
                        except:
                            rmax = 0.0
                    else:
                        rmax = 0.0
                feature_stats['range'] = rmax - rmin

        try:
            # Use the provided feature id as __fid__
            feature_stats['__fid__'] = feat['id']
        except KeyError:
            # use the enumerator
            feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in feat['properties'].items():
                feature_stats[key] = val

        results.append(feature_stats)

    return results
Exemplo n.º 32
0
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None,
                global_src_extent=False, categorical=False, stats=None,
                copy_properties=False, all_touched=False, transform=None,
                add_stats=None, raster_out=False):
    """Summary statistics of a raster, broken out by vector geometries.

    Attributes
    ----------
    vectors : path to an OGR vector source or list of geo_interface or WKT str
    raster : ndarray or path to a GDAL raster source
        If ndarray is passed, the `transform` kwarg is required.
    layer_num : int, optional
        If `vectors` is a path to an OGR source, the vector layer to use
        (counting from 0).
        defaults to 0.
    band_num : int, optional
        If `raster` is a GDAL source, the band number to use (counting from 1).
        defaults to 1.
    nodata_value : float, optional
        If `raster` is a GDAL source, this value overrides any NODATA value
        specified in the file's metadata.
        If `None`, the file's metadata's NODATA value (if any) will be used.
        `ndarray`s don't support `nodata_value`.
        defaults to `None`.
    global_src_extent : bool, optional
        Pre-allocate entire raster before iterating over vector features.
        Use `True` if limited by disk IO or indexing into raster;
            requires sufficient RAM to store array in memory
        Use `False` with fast disks and a well-indexed raster, or when
        memory-constrained.
        Ignored when `raster` is an ndarray,
            because it is already completely in memory.
        defaults to `False`.
    categorical : bool, optional
    stats : list of str, or space-delimited str, optional
        Which statistics to calculate for each zone.
        All possible choices are listed in `VALID_STATS`.
        defaults to `DEFAULT_STATS`, a subset of these.
    copy_properties : bool, optional
        Include feature properties alongside the returned stats.
        defaults to `False`
    all_touched : bool, optional
        Whether to include every raster cell touched by a geometry, or only
        those having a center point within the polygon.
        defaults to `False`
    transform : list of float, optional
        GDAL-style geotransform coordinates when `raster` is an ndarray.
        Required when `raster` is an ndarray, otherwise ignored.
    add_stats : Dictionary with names and functions of additional statistics to
                compute, optional
    raster_out : Include the masked numpy array for each feature, optional
        Each feature dictionary will have the following additional keys:
            clipped raster (`mini_raster`)
            Geo-transform (`mini_raster_GT`)
            No Data Value (`mini_raster_NDV`)

    Returns
    -------
    list of dicts
        Each dict represents one vector geometry.
        Its keys include `__fid__` (the geometry feature id)
        and each of the `stats` requested.
    """

    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, str):
            if stats in ['*', 'ALL']:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x.startswith("percentile_"):
            try:
                get_percentile(x)
            except ValueError:
                raise RasterStatsError(
                    "Stat `%s` is not valid; must use"
                    " `percentile_` followed by a float >= 0 or <= 100")
        elif x not in VALID_STATS:
            raise RasterStatsError(
                "Stat `%s` not valid; "
                "must be one of \n %r" % (x, VALID_STATS))

    run_count = False
    if categorical or 'majority' in stats or 'minority' in stats or \
       'unique' in stats:
        # run the counter once, only if needed
        run_count = True

    if isinstance(raster, np.ndarray):
        raster_type = 'ndarray'

        # must have transform arg
        if not transform:
            raise RasterStatsError("Must provide the 'transform' kwarg when "
                                   "using ndarrays as src raster")
        rgt = transform
        rsize = (raster.shape[1], raster.shape[0])

        # global_src_extent is implicitly turned on, array is already in memory
        if not global_src_extent:
            global_src_extent = True

        if nodata_value:
            raise NotImplementedError("ndarrays don't support 'nodata_value'")

    else:
        raster_type = 'gdal'
        rds = gdal.Open(raster, GA_ReadOnly)
        if not rds:
            raise RasterStatsError("Cannot open %r as GDAL raster" % raster)
        rb = rds.GetRasterBand(band_num)
        rgt = rds.GetGeoTransform()
        rsize = (rds.RasterXSize, rds.RasterYSize)

        if nodata_value is not None:
            nodata_value = float(nodata_value)
            rb.SetNoDataValue(nodata_value)
        else:
            nodata_value = rb.GetNoDataValue()

    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)

    if global_src_extent and raster_type == 'gdal':
        # create an in-memory numpy array of the source raster data
        # covering the whole extent of the vector layer
        if strategy != "ogr":
            raise RasterStatsError("global_src_extent requires OGR vector")

        # find extent of ALL features
        ds = ogr.Open(vectors)
        layer = ds.GetLayer(layer_num)
        ex = layer.GetExtent()
        # transform from OGR extent to xmin, ymin, xmax, ymax
        layer_extent = (ex[0], ex[2], ex[1], ex[3])

        global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent, rsize)
        global_src_array = rb.ReadAsArray(*global_src_offset)
    elif global_src_extent and raster_type == 'ndarray':
        global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
        global_src_array = raster

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            geom = shape(feat['geometry'])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds))
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        geom_bounds = list(geom.bounds)

        # calculate new pixel coordinates of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                # fastest option when you have fast disks and fast raster
                # advantage: each feature uses the smallest raster chunk
                # disadvantage: lots of disk reads on the source raster
                src_array = rb.ReadAsArray(*src_offset)
            else:
                # derive array from global source extent array
                # useful *only* when disk IO or raster format inefficiencies
                # are your limiting factor
                # advantage: reads raster data in one pass before loop
                # disadvantage: large vector extents combined with big rasters
                #               require lotsa memory
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)

            if all_touched:
                gdal.RasterizeLayer(rvds, [1], mem_layer, None, None,
                                    burn_values=[1],
                                    options=['ALL_TOUCHED=True'])
            else:
                gdal.RasterizeLayer(rvds, [1], mem_layer, None, None,
                                    burn_values=[1],
                                    options=['ALL_TOUCHED=False'])

            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 for the correct mask effect
            # we also mask out nodata values explicitly
            masked = np.ma.MaskedArray(
                src_array,
                mask=np.logical_or(
                    src_array == nodata_value,
                    np.logical_not(rv_array)
                )
            )

            if run_count:
                pixel_count = Counter(masked.compressed())

            if categorical:
                feature_stats = dict(pixel_count)
            else:
                feature_stats = {}

            if 'min' in stats:
                feature_stats['min'] = float(masked.min())
            if 'max' in stats:
                feature_stats['max'] = float(masked.max())
            if 'mean' in stats:
                feature_stats['mean'] = float(masked.mean())
            if 'count' in stats:
                feature_stats['count'] = int(masked.count())
            # optional
            if 'sum' in stats:
                feature_stats['sum'] = float(masked.sum())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            if 'median' in stats:
                feature_stats['median'] = float(np.median(masked.compressed()))
            if 'majority' in stats:cd
                try:
                    feature_stats['majority'] = pixel_count.most_common(1)[0][0]
                except IndexError:
                    feature_stats['majority'] = None
            if 'minority' in stats:
                try:
                    feature_stats['minority'] = pixel_count.most_common()[-1][0]
                except IndexError:
                    feature_stats['minority'] = None
            if 'unique' in stats:
                feature_stats['unique'] = len(list(pixel_count.keys()))
            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    rmax = float(masked.max())
                feature_stats['range'] = rmax - rmin

            for pctile in [s for s in stats if s.startswith('percentile_')]:
                q = get_percentile(pctile)
                pctarr = masked.compressed()
                if pctarr.size == 0:
                    feature_stats[pctile] = None
                else:
                    feature_stats[pctile] = np.percentile(pctarr, q)

            if add_stats is not None:
                for stat_name, stat_func in add_stats.items():
                        feature_stats[stat_name] = stat_func(masked)
            if raster_out:
                masked.fill_value = nodata_value
                masked.data[masked.mask] = nodata_value
                feature_stats['mini_raster'] = masked
                feature_stats['mini_raster_GT'] = new_gt
                feature_stats['mini_raster_NDV'] = nodata_value

        # Use the enumerated id as __fid__
        feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)
Exemplo n.º 33
0
    def _process_element(self, element):
        if not bool(element):
            return element.clone(crs=self.p.projection)

        crs = element.crs
        proj = self.p.projection
        if (isinstance(crs, ccrs.PlateCarree) and not isinstance(proj, ccrs.PlateCarree)
            and crs.proj4_params['lon_0'] != 0):
            element = self.instance(projection=ccrs.PlateCarree())(element)

        if isinstance(proj, ccrs.CRS) and not isinstance(proj, ccrs.Projection):
            raise ValueError('invalid transform:'
                             ' Spherical contouring is not supported - '
                             ' consider using PlateCarree/RotatedPole.')

        if isinstance(element, Polygons):
            geoms = polygons_to_geom_dicts(element, skip_invalid=False)
        else:
            geoms = path_to_geom_dicts(element, skip_invalid=False)

        projected = []
        for path in geoms:
            geom = path['geometry']

            # Ensure minimum area for polygons (precision issues cause errors)
            if isinstance(geom, Polygon) and geom.area < 1e-15:
                continue
            elif isinstance(geom, MultiPolygon):
                polys = [g for g in geom if g.area > 1e-15]
                if not polys:
                    continue
                geom = MultiPolygon(polys)
            elif (not geom or isinstance(geom, GeometryCollection)):
                continue

            proj_geom = proj.project_geometry(geom, element.crs)

            # Attempt to fix geometry without being noisy about it
            logger = logging.getLogger()
            try:
                prev = logger.level
                logger.setLevel(logging.ERROR)
                if not proj_geom.is_valid:
                    proj_geom = proj.project_geometry(geom.buffer(0), element.crs)
            except:
                continue
            finally:
                logger.setLevel(prev)
            if proj_geom.geom_type == 'GeometryCollection' and len(proj_geom) == 0:
                continue
            data = dict(path, geometry=proj_geom)
            if 'holes' in data:
                data.pop('holes')
            projected.append(data)

        if len(geoms) and len(projected) == 0:
            self.warning('While projecting a %s element from a %s coordinate '
                         'reference system (crs) to a %s projection none of '
                         'the projected paths were contained within the bounds '
                         'specified by the projection. Ensure you have specified '
                         'the correct coordinate system for your data.' %
                         (type(element).__name__, type(element.crs).__name__,
                          type(self.p.projection).__name__))

        # Try casting back to original types
        if element.interface is GeoPandasInterface:
            import geopandas as gpd
            projected = gpd.GeoDataFrame(projected, columns=element.data.columns)
        elif element.interface is MultiInterface:
            x, y = element.kdims
            item = element.data[0] if element.data else None
            if item is None or (isinstance(item, dict) and 'geometry' in item):
                return element.clone(projected, crs=self.p.projection)
            projected = [geom_dict_to_array_dict(p, [x.name, y.name]) for p in projected]
            if any('holes' in p for p in projected):
                pass
            elif pd and isinstance(item, pd.DataFrame):
                projected = [pd.DataFrame(p, columns=item.columns) for p in projected]
            elif isinstance(item, np.ndarray):
                projected = [np.column_stack([p[d.name] for d in element.dimensions()])
                             for p in projected]
        return element.clone(projected, crs=self.p.projection)
Exemplo n.º 34
0
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None, 
                 global_src_extent=False, categorical=False, stats=None, 
                 copy_properties=False, all_touched=False, transform=None):

    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, str):
            if stats in ['*', 'ALL']:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x not in VALID_STATS:
            raise RasterStatsError("Stat `%s` not valid;" \
                " must be one of \n %r" % (x, VALID_STATS))

    run_count = False
    if categorical or 'majority' in stats or 'minority' in stats or \
       'unique' in stats:
        # run the counter once, only if needed
        run_count = True

    if isinstance(raster, np.ndarray):
        raster_type = 'ndarray'

        # must have transform arg
        if not transform:
            raise RasterStatsError("Must provide the 'transform' kwarg when "\
                "using ndarrays as src raster")
        rgt = transform
        rsize = (raster.shape[1], raster.shape[0])

        # global_src_extent is implicitly turned on, array is already in memory
        if not global_src_extent:
            global_src_extent = True

        if nodata_value:
            raise NotImplementedError("ndarrays don't support 'nodata_value'")

    else:
        raster_type = 'gdal'
        rds = gdal.Open(raster, GA_ReadOnly)
        if not rds:
            raise RasterStatsError("Cannot open %r as GDAL raster" % raster)
        rb = rds.GetRasterBand(band_num)
        rgt = rds.GetGeoTransform()
        rsize = (rds.RasterXSize, rds.RasterYSize)

        if nodata_value is not None:
            nodata_value = float(nodata_value)
            rb.SetNoDataValue(nodata_value)
        else:
            nodata_value = rb.GetNoDataValue()

    rbounds = raster_extent_as_bounds(rgt, rsize)

    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)

    if global_src_extent and raster_type == 'gdal':
        # create an in-memory numpy array of the source raster data
        # covering the whole extent of the vector layer
        if strategy != "ogr":
            raise RasterStatsError("global_src_extent requires OGR vector")

        # find extent of ALL features
        ds = ogr.Open(vectors)
        layer = ds.GetLayer(layer_num)
        ex = layer.GetExtent()
        # transform from OGR extent to xmin, ymin, xmax, ymax
        layer_extent = (ex[0], ex[2], ex[1], ex[3])

        global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent)
        global_src_array = rb.ReadAsArray(*global_src_offset)
    elif global_src_extent and raster_type == 'ndarray':
        global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
        global_src_array = raster

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            geom = shape(feat['geometry'])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) 
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]

        # calculate new geotransform of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                # fastest option when you have fast disks and well-indexed raster
                # advantage: each feature uses the smallest raster chunk
                # disadvantage: lots of disk reads on the source raster
                src_array = rb.ReadAsArray(*src_offset)
            else:
                # derive array from global source extent array
                # useful *only* when disk IO or raster format inefficiencies are your limiting factor
                # advantage: reads raster data in one pass before loop
                # disadvantage: large vector extents combined with big rasters need lotsa memory
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)
            
            if all_touched:
                gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=True'])
            else:
                gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=False'])
            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explictly
            masked = np.ma.MaskedArray(
                src_array,
                mask=np.logical_or(
                    src_array == nodata_value,
                    np.logical_not(rv_array)
                )
            )

            if run_count:
                pixel_count = Counter(masked.compressed())

            if categorical:  
                feature_stats = dict(pixel_count)
            else:
                feature_stats = {}

            if 'min' in stats:
                feature_stats['min'] = float(masked.min())
            if 'max' in stats:
                feature_stats['max'] = float(masked.max())
            if 'mean' in stats:
                feature_stats['mean'] = float(masked.mean())
            if 'count' in stats:
                feature_stats['count'] = int(masked.count())
            # optional
            if 'sum' in stats:
                feature_stats['sum'] = float(masked.sum())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            if 'median' in stats:
                feature_stats['median'] = float(np.median(masked.compressed()))
            if 'majority' in stats:
                try:
                    feature_stats['majority'] = pixel_count.most_common(1)[0][0]
                except IndexError:
                    feature_stats['majority'] = None
            if 'minority' in stats:
                try:
                    feature_stats['minority'] = pixel_count.most_common()[-1][0]
                except IndexError:
                    feature_stats['minority'] = None
            if 'unique' in stats:
                feature_stats['unique'] = len(list(pixel_count.keys()))
            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    rmax = float(masked.max())
                feature_stats['range'] = rmax - rmin
        
        # Use the enumerated id as __fid__
        feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)

    return results
Exemplo n.º 35
0
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None,
                global_src_extent=False, categorical=False, stats=None,
                copy_properties=False, all_touched=False, transform=None,
                affine=None, add_stats=None, raster_out=False, opt_georaster=False):
    """Summary statistics of a raster, broken out by vector geometries.

    Attributes
    ----------
    vectors : path to an OGR vector source or list of geo_interface or WKT str
    raster : ndarray or path to a GDAL raster source
        If ndarray is passed, the `transform` kwarg is required.
    layer_num : int, optional
        If `vectors` is a path to an OGR source, the vector layer to use
        (counting from 0).
        defaults to 0.
    band_num : int, optional
        If `raster` is a GDAL source, the band number to use (counting from 1).
        defaults to 1.
    nodata_value : float, optional
        If `raster` is a GDAL source, this value overrides any NODATA value
        specified in the file's metadata.
        If `None`, the file's metadata's NODATA value (if any) will be used.
        `ndarray`s don't support `nodata_value`.
        defaults to `None`.
    global_src_extent : bool, optional
        Pre-allocate entire raster before iterating over vector features.
        Use `True` if limited by disk IO or indexing into raster;
            requires sufficient RAM to store array in memory
        Use `False` with fast disks and a well-indexed raster, or when
        memory-constrained.
        Ignored when `raster` is an ndarray,
            because it is already completely in memory.
        defaults to `False`.
    categorical : bool, optional
    stats : list of str, or space-delimited str, optional
        Which statistics to calculate for each zone.
        All possible choices are listed in `VALID_STATS`.
        defaults to `DEFAULT_STATS`, a subset of these.
    copy_properties : bool, optional
        Include feature properties alongside the returned stats.
        defaults to `False`
    all_touched : bool, optional
        Whether to include every raster cell touched by a geometry, or only
        those having a center point within the polygon.
        defaults to `False`
    transform : list or tuple of 6 floats or Affine object, optional
        Required when `raster` is an ndarray.
        6-tuple for GDAL-style geotransform coordinates
        Affine for rasterio-style geotransform coordinates
        Can use the keyword `affine` which is an alias for `transform`
    add_stats : Dictionary with names and functions of additional statistics to
                compute, optional
    raster_out : Include the masked numpy array for each feature, optional
        Each feature dictionary will have the following additional keys:
            clipped raster (`mini_raster`)
            Geo-transform (`mini_raster_GT`)
            No Data Value (`mini_raster_NDV`)
    opt_georaster : Whether the raster should be GeoRaster or not (Boolean, default=False)

    Returns
    -------
    list of dicts
        Each dict represents one vector geometry.
        Its keys include `__fid__` (the geometry feature id)
        and each of the `stats` requested.
    """
    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, str):
            if stats in ['*', 'ALL']:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x.startswith("percentile_"):
            get_percentile(x)
        elif x not in VALID_STATS:
            raise ValueError(
                "Stat `%s` not valid; "
                "must be one of \n %r" % (x, VALID_STATS))

    if opt_georaster:
        import georasters

    run_count = False
    if categorical or 'majority' in stats or 'minority' in stats or \
       'unique' in stats:
        # run the counter once, only if needed
        run_count = True

    if isinstance(raster, np.ndarray):
        raster_type = 'ndarray'

        # must have transform info
        if affine:
            transform = affine
        if not transform:
            raise ValueError("Must provide the 'transform' kwarg "
                             "when using ndarrays as src raster")
        try:
            rgt = transform.to_gdal()  # an Affine object
        except AttributeError:
            rgt = transform  # a GDAL geotransform

        rshape = (raster.shape[1], raster.shape[0])

        # global_src_extent is implicitly turned on, array is already in memory
        global_src_extent = True

        if nodata_value:
            raise NotImplementedError("ndarrays don't support 'nodata_value'")
    else:
        raster_type = 'gdal'

        with rasterio.drivers():
            with rasterio.open(raster, 'r') as src:
                affine = src.affine
                rgt = affine.to_gdal()
                rshape = (src.width, src.height)
                rnodata = src.nodata

        if nodata_value is not None:
            # override with specified nodata
            nodata_value = float(nodata_value)
        else:
            nodata_value = rnodata

    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)

    if global_src_extent and raster_type == 'gdal':
        # create an in-memory numpy array of the source raster data
        extent = raster_extent_as_bounds(rgt, rshape)
        global_src_offset = bbox_to_pixel_offsets(rgt, extent, rshape)
        window = pixel_offsets_to_window(global_src_offset)
        with rasterio.drivers():
            with rasterio.open(raster, 'r') as src:
                global_src_array = src.read(
                    band_num, window=window, masked=False)
    elif global_src_extent and raster_type == 'ndarray':
        global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
        global_src_array = raster

    results = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            geom = shape(feat['geometry'])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds))
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        geom_bounds = list(geom.bounds)

        # calculate new pixel coordinates of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rshape)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                window = pixel_offsets_to_window(src_offset)
                with rasterio.drivers():
                    with rasterio.open(raster, 'r') as src:
                        src_array = src.read(
                            band_num, window=window, masked=False)
            else:
                # subset feature array from global source extent array
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # create ndarray of rasterized geometry
            rv_array = rasterize_geom(geom, src_offset, new_gt, all_touched)
            assert rv_array.shape == src_array.shape

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 for the correct mask effect
            # we also mask out nodata values explicitly
            masked = np.ma.MaskedArray(
                src_array,
                mask=np.logical_or(
                    src_array == nodata_value,
                    np.logical_not(rv_array)
                )
            )

            if run_count:
                pixel_count = Counter(masked.compressed().tolist())

            if categorical:
                feature_stats = dict(pixel_count)
            else:
                feature_stats = {}

            if 'min' in stats:
                feature_stats['min'] = float(masked.min())
            if 'max' in stats:
                feature_stats['max'] = float(masked.max())
            if 'mean' in stats:
                feature_stats['mean'] = float(masked.mean())
            if 'count' in stats:
                feature_stats['count'] = int(masked.count())
            # optional
            if 'sum' in stats:
                feature_stats['sum'] = float(masked.sum())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            if 'median' in stats:
                feature_stats['median'] = float(np.median(masked.compressed()))
            if 'majority' in stats:
                try:
                    feature_stats['majority'] = float(pixel_count.most_common(1)[0][0])
                except IndexError:
                    feature_stats['majority'] = None
            if 'minority' in stats:
                try:
                    feature_stats['minority'] = float(pixel_count.most_common()[-1][0])
                except IndexError:
                    feature_stats['minority'] = None
            if 'unique' in stats:
                feature_stats['unique'] = len(list(pixel_count.keys()))
            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    rmax = float(masked.max())
                feature_stats['range'] = rmax - rmin

            for pctile in [s for s in stats if s.startswith('percentile_')]:
                q = get_percentile(pctile)
                pctarr = masked.compressed()
                if pctarr.size == 0:
                    feature_stats[pctile] = None
                else:
                    feature_stats[pctile] = np.percentile(pctarr, q)

            if add_stats is not None:
                for stat_name, stat_func in add_stats.items():
                        feature_stats[stat_name] = stat_func(masked)
            if raster_out:
                masked.fill_value = nodata_value
                masked.data[masked.mask] = nodata_value
                if opt_georaster:
                    feature_stats['mini_raster'] = georasters.GeoRaster(
                        masked, new_gt, nodata_value=nodata_value,
                        projection=spatial_ref)
                else:
                    feature_stats['mini_raster'] = masked
                    feature_stats['mini_raster_GT'] = new_gt
                    feature_stats['mini_raster_NDV'] = nodata_value

        if 'fid' in feat:
            # Use the fid directly,
            # likely came from OGR data via .utils.feature_to_geojson
            feature_stats['__fid__'] = feat['fid']
        else:
            # Use the enumerated id
            feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)

    return results
Exemplo n.º 36
0
def zonal_stats(vectors, raster, layer_num=0, band_num=1, func=None,
                nodata_value=None, categorical=False, stats=None,
                copy_properties=False, all_touched=False, transform=None):

    if not stats:
        if not categorical:
            stats = ['count', 'min', 'max', 'mean', 'std']
            if func:
                stats.append('func')

    # must have transform arg
    if not transform:
        raise Exception("Must provide the 'transform' kwarg")
    rgt = transform
    rsize = (raster.shape[1], raster.shape[0])

    rbounds = raster_extent_as_bounds(rgt, rsize)
    features_iter, strategy, spatial_ref = get_features(vectors, layer_num)
    global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
    global_src_array = raster

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []
    entity_images = []

    for i, feat in enumerate(features_iter):
        if feat['type'] == "Feature":
            geom = shape(feat['geometry'])
        else:  # it's just a geometry
            geom = shape(feat)

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds))
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]

        # calculate new geotransform of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
            img = {'__fid__': i, 'img': None}
        else:
            # derive array from global source extent array
            # useful *only* when disk IO or raster format inefficiencies
            # are your limiting factor
            # advantage: reads raster data in one pass before loop
            # disadvantage: large vector extents combined with big rasters
            # need lotsa memory
            xa = src_offset[0] - global_src_offset[0]
            ya = src_offset[1] - global_src_offset[1]
            xb = xa + src_offset[2]
            yb = ya + src_offset[3]
            src_array = global_src_array[ya:yb, xa:xb]

            # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create(
                'rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)

            if all_touched:
                gdal.RasterizeLayer(
                    rvds, [1], mem_layer, None, None,
                    burn_values=[1], options=['ALL_TOUCHED=True'])
            else:
                gdal.RasterizeLayer(
                    rvds, [1], mem_layer, None, None,
                    burn_values=[1], options=['ALL_TOUCHED=False'])
            rv_array = rvds.ReadAsArray()

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 to get the correct mask effect
            # we also mask out nodata values explictly
            masked = np.ma.MaskedArray(
                src_array,
                mask=np.logical_or(
                    src_array == nodata_value,
                    np.logical_not(rv_array)
                )
            )

            feature_stats = {}

            if 'min' in stats:
                feature_stats['min'] = float(masked.min())
            if 'max' in stats:
                feature_stats['max'] = float(masked.max())
            if 'mean' in stats:
                feature_stats['mean'] = float(masked.mean())
            if 'count' in stats:
                feature_stats['count'] = int(masked.count())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            # optional
            if 'func' in stats:
                feature_stats[func.__name__] = func(masked)
            if 'sum' in stats:
                feature_stats['sum'] = float(masked.sum())
            if 'std' in stats:
                feature_stats['std'] = float(masked.std())
            if 'median' in stats:
                feature_stats['median'] = float(np.median(masked.compressed()))
            if 'range' in stats:
                try:
                    rmin = feature_stats['min']
                except KeyError:
                    rmin = float(masked.min())
                try:
                    rmax = feature_stats['max']
                except KeyError:
                    rmax = float(masked.max())
                feature_stats['range'] = rmax - rmin
            img = {'__fid__': i, 'img': masked}

        # Use the enumerated id as __fid__
        feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)
        entity_images.append(img)
    return results, entity_images
Exemplo n.º 37
0
def zonal_stats(vectors, raster, layer=0, band_num=1, nodata_value=None,
                global_src_extent=False, categorical=False, stats=None,
                copy_properties=False, all_touched=False, transform=None, affine=None,
                add_stats=None, raster_out=False, category_map=None, **kwargs):
    """Summary statistics of a raster, broken out by vector geometries.

    Attributes
    ----------
    vectors : path to an OGR vector source or list of geo_interface or WKT str
    raster : ndarray or path to a GDAL raster source
        If ndarray is passed, the `transform` kwarg is required.
    layer : int or string, optional
        If `vectors` is a path to an fiona source,
        specify the vector layer to use either by name or number.
        defaults to 0
    band_num : int, optional
        If `raster` is a GDAL source, the band number to use (counting from 1).
        defaults to 1.
    nodata_value : float, optional
        If `raster` is a GDAL source, this value overrides any NODATA value
        specified in the file's metadata.
        If `None`, the file's metadata's NODATA value (if any) will be used.
        `ndarray`s don't support `nodata_value`.
        defaults to `None`.
    global_src_extent : bool, optional
        Pre-allocate entire raster before iterating over vector features.
        Use `True` if limited by disk IO or indexing into raster;
            requires sufficient RAM to store array in memory
        Use `False` with fast disks and a well-indexed raster, or when
        memory-constrained.
        Ignored when `raster` is an ndarray,
            because it is already completely in memory.
        defaults to `False`.
    categorical : bool, optional
    stats : list of str, or space-delimited str, optional
        Which statistics to calculate for each zone.
        All possible choices are listed in `utils.VALID_STATS`.
        defaults to `DEFAULT_STATS`, a subset of these.
    copy_properties : bool, optional
        Include feature properties alongside the returned stats.
        defaults to `False`
    all_touched : bool, optional
        Whether to include every raster cell touched by a geometry, or only
        those having a center point within the polygon.
        defaults to `False`
    transform : list or tuple of 6 floats or Affine object, optional
        Required when `raster` is an ndarray.
        6-tuple for GDAL-style geotransform coordinates
        Affine for rasterio-style geotransform coordinates
        Can use the keyword `affine` which is an alias for `transform`
    add_stats : Dictionary with names and functions of additional statistics to
                compute, optional
    raster_out : Include the masked numpy array for each feature, optional
        Each feature dictionary will have the following additional keys:
            clipped raster (`mini_raster`)
            Geo-transform (`mini_raster_GT`)
            No Data Value (`mini_raster_NDV`)
    category_map : A dictionary mapping raster values to human-readable categorical names
        Only applies when categorical is True

    Returns
    -------
    list of dicts
        Each dict represents one vector geometry.
        Its keys include `__fid__` (the geometry feature id)
        and each of the `stats` requested.
    """
    stats, run_count = check_stats(stats, categorical)

    rtype, rgt, rshape, global_src_extent, nodata_value = \
        raster_info(raster, global_src_extent, nodata_value, affine, transform)

    features_iter = read_features(vectors, layer)

    if global_src_extent and rtype == 'gdal':
        # create an in-memory numpy array of the source raster data
        extent = raster_extent_as_bounds(rgt, rshape)
        global_src_offset = bbox_to_pixel_offsets(rgt, extent, rshape)
        window = pixel_offsets_to_window(global_src_offset)
        with rasterio.drivers():
            with rasterio.open(raster, 'r') as src:
                global_src_array = src.read(
                    band_num, window=window, masked=False)
    elif global_src_extent and rtype == 'ndarray':
        global_src_offset = (0, 0, raster.shape[0], raster.shape[1])
        global_src_array = raster

    results = []

    for i, feat in enumerate(features_iter):
        geom = shape(feat['geometry'])

        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        # TODO warning, suggest point_query instead
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds))
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        geom_bounds = list(geom.bounds)

        # calculate new pixel coordinates of the feature subset
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rshape)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )

        if src_offset[2] <= 0 or src_offset[3] <= 0:
            # we're off the raster completely, no overlap at all
            # so there's no need to even bother trying to calculate
            feature_stats = dict([(s, None) for s in stats])
        else:
            if not global_src_extent:
                # use feature's source extent and read directly from source
                window = pixel_offsets_to_window(src_offset)
                with rasterio.drivers():
                    with rasterio.open(raster, 'r') as src:
                        src_array = src.read(
                            band_num, window=window, masked=False)
            else:
                # subset feature array from global source extent array
                xa = src_offset[0] - global_src_offset[0]
                ya = src_offset[1] - global_src_offset[1]
                xb = xa + src_offset[2]
                yb = ya + src_offset[3]
                src_array = global_src_array[ya:yb, xa:xb]

            # create ndarray of rasterized geometry
            rv_array = rasterize_geom(geom, src_offset, new_gt, all_touched)
            assert rv_array.shape == src_array.shape

            # Mask the source data array with our current feature
            # we take the logical_not to flip 0<->1 for the correct mask effect
            # we also mask out nodata values explicitly
            masked = np.ma.MaskedArray(
                src_array,
                mask=np.logical_or(
                    src_array == nodata_value,
                    np.logical_not(rv_array)
                )
            )

            if masked.compressed().size == 0:
                # nothing here, fill with None and move on
                feature_stats = dict([(stat, None) for stat in stats])
                if 'count' in stats:  # special case, zero makes sense here
                    feature_stats['count'] = 0
            else:
                if run_count:
                    keys, counts = np.unique(masked.compressed(), return_counts=True)
                    pixel_count = dict(zip([np.asscalar(k) for k in keys],
                                           [np.asscalar(c) for c in counts]))

                if categorical:
                    feature_stats = dict(pixel_count)
                    if category_map:
                        feature_stats = remap_categories(category_map, feature_stats)
                else:
                    feature_stats = {}

                if 'min' in stats:
                    feature_stats['min'] = float(masked.min())
                if 'max' in stats:
                    feature_stats['max'] = float(masked.max())
                if 'mean' in stats:
                    feature_stats['mean'] = float(masked.mean())
                if 'count' in stats:
                    feature_stats['count'] = int(masked.count())
                # optional
                if 'sum' in stats:
                    feature_stats['sum'] = float(masked.sum())
                if 'std' in stats:
                    feature_stats['std'] = float(masked.std())
                if 'median' in stats:
                    feature_stats['median'] = float(np.median(masked.compressed()))
                if 'majority' in stats:
                    try:
                        feature_stats['majority'] = float(key_assoc_val(pixel_count, max))
                    except IndexError:
                        feature_stats['majority'] = None
                if 'minority' in stats:
                    try:
                        feature_stats['minority'] = float(key_assoc_val(pixel_count, min))
                    except IndexError:
                        feature_stats['minority'] = None
                if 'unique' in stats:
                    feature_stats['unique'] = len(list(pixel_count.keys()))
                if 'range' in stats:
                    try:
                        rmin = feature_stats['min']
                    except KeyError:
                        rmin = float(masked.min())
                    try:
                        rmax = feature_stats['max']
                    except KeyError:
                        rmax = float(masked.max())
                    feature_stats['range'] = rmax - rmin

                for pctile in [s for s in stats if s.startswith('percentile_')]:
                    q = get_percentile(pctile)
                    pctarr = masked.compressed()
                    if pctarr.size == 0:
                        feature_stats[pctile] = None
                    else:
                        feature_stats[pctile] = np.percentile(pctarr, q)

            if 'nodata' in stats:
                featmasked = np.ma.MaskedArray(src_array, mask=np.logical_not(rv_array))
                keys, counts = np.unique(featmasked.compressed(), return_counts=True)
                pixel_count = dict(zip([np.asscalar(k) for k in keys],
                                       [np.asscalar(c) for c in counts]))
                feature_stats['nodata'] = pixel_count.get(nodata_value, 0)

            if add_stats is not None:
                for stat_name, stat_func in add_stats.items():
                        feature_stats[stat_name] = stat_func(masked)

            if raster_out:
                masked.fill_value = nodata_value
                masked.data[masked.mask] = nodata_value
                feature_stats['mini_raster'] = masked
                feature_stats['mini_raster_GT'] = new_gt
                feature_stats['mini_raster_NDV'] = nodata_value

        if 'fid' in feat:
            # Use the fid directly,
            # likely came from OGR data via .utils.feature_to_geojson
            feature_stats['__fid__'] = feat['fid']
        else:
            # Use the enumerated id
            feature_stats['__fid__'] = i

        if 'properties' in feat and copy_properties:
            for key, val in list(feat['properties'].items()):
                feature_stats[key] = val

        results.append(feature_stats)

    return results
Exemplo n.º 38
0
print(elapsed_time_fl)  # 0.2 sec for 3 particles

## Remove loop
import fiona
polyShp = fiona.open('./habitat/rock_lobster_polygons_fixed.shp')

polyList = []
polyProperties = []
for poly in polyShp:
    polyGeom = Polygon(poly['geometry']['coordinates'][0])
    polyList.append(polyGeom)
    polyProperties.append(poly['properties'])
#print(polyList[10])
#print(polyProperties[10])
multiShp = MultiPolygon(polyList)
multiShp = multiShp.buffer(0)
#print(multiShp.is_valid)
#print(type(multiShp))

start = time.time()
for i in range(len(lons)):
    pt = Point(lons[i], lats[i])
    in_area = pt.within(multiShp)
    if in_area == True:
        print("In habitat")
    else:
        print("No habitat")
# get time taken to run
elapsed_time_fl = (time.time() - start)
print(elapsed_time_fl)  # 0.01 sec for the 3 particles
Exemplo n.º 39
0
    def _process_element(self, element):
        if not bool(element):
            return element.clone(crs=self.p.projection)

        crs = element.crs
        proj = self.p.projection
        if (isinstance(crs, ccrs.PlateCarree) and not isinstance(proj, ccrs.PlateCarree)
            and crs.proj4_params['lon_0'] != 0):
            element = self.instance(projection=ccrs.PlateCarree())(element)

        if isinstance(proj, ccrs.CRS) and not isinstance(proj, ccrs.Projection):
            raise ValueError('invalid transform:'
                             ' Spherical contouring is not supported - '
                             ' consider using PlateCarree/RotatedPole.')

        if isinstance(element, Polygons):
            geoms = polygons_to_geom_dicts(element, skip_invalid=False)
        else:
            geoms = path_to_geom_dicts(element, skip_invalid=False)

        projected = []
        for path in geoms:
            geom = path['geometry']

            # Ensure minimum area for polygons (precision issues cause errors)
            if isinstance(geom, Polygon) and geom.area < 1e-15:
                continue
            elif isinstance(geom, MultiPolygon):
                polys = [g for g in geom if g.area > 1e-15]
                if not polys:
                    continue
                geom = MultiPolygon(polys)
            elif (not geom or isinstance(geom, GeometryCollection)):
                continue

            proj_geom = proj.project_geometry(geom, element.crs)

            # Attempt to fix geometry without being noisy about it
            logger = logging.getLogger()
            try:
                prev = logger.level
                logger.setLevel(logging.ERROR)
                if not proj_geom.is_valid:
                    proj_geom = proj.project_geometry(geom.buffer(0), element.crs)
            except:
                continue
            finally:
                logger.setLevel(prev)
            if proj_geom.geom_type == 'GeometryCollection' and len(proj_geom) == 0:
                continue
            data = dict(path, geometry=proj_geom)
            if 'holes' in data:
                data.pop('holes')
            projected.append(data)

        if len(geoms) and len(projected) == 0:
            self.warning('While projecting a %s element from a %s coordinate '
                         'reference system (crs) to a %s projection none of '
                         'the projected paths were contained within the bounds '
                         'specified by the projection. Ensure you have specified '
                         'the correct coordinate system for your data.' %
                         (type(element).__name__, type(element.crs).__name__,
                          type(self.p.projection).__name__))

        # Try casting back to original types
        if element.interface is GeoPandasInterface:
            import geopandas as gpd
            projected = gpd.GeoDataFrame(projected, columns=element.data.columns)
        elif element.interface is MultiInterface:
            x, y = element.kdims
            item = element.data[0] if element.data else None
            if item is None or (isinstance(item, dict) and 'geometry' in item):
                return element.clone(projected, crs=self.p.projection)
            projected = [geom_dict_to_array_dict(p, [x.name, y.name]) for p in projected]
            if any('holes' in p for p in projected):
                pass
            elif pd and isinstance(item, pd.DataFrame):
                projected = [pd.DataFrame(p, columns=item.columns) for p in projected]
            elif isinstance(item, np.ndarray):
                projected = [np.column_stack([p[d.name] for d in element.dimensions()])
                             for p in projected]
        return element.clone(projected, crs=self.p.projection)
def raster_stats_multi(vectors, rasterlist, geom_attr='GeomWKT', id_attr='fid', 
                        band_num=1, nodata_value=None, 
                        global_src_extent=False, categorical=False, stats=None, 
                        copy_properties=False, all_touched = False):
    '''
    Multi-raster version of the raster_stats (zonal_stats) function found in rasterstats package.
    
    When running zonal stats using the rasterstats package each feature (zone) must first 
    be rasterized. These are then used to mask the input raster. 
    However we often need to run raster stats on many (thousands) of input rasters 
    (all with identical geotransforms) for the same zones. 
    
    In this scenario the rasterization of the zones is a major overhead.
    This version rasterizes once and then runs the overlay against all rasters (which must have 
    the same resolution / extent as one another). It returns a generator so the stats for 
    each raster are generated when the calling code is ready for them.
    '''
    DEFAULT_STATS = ['count', 'min', 'max', 'mean']
    VALID_STATS = DEFAULT_STATS + \
        ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range']
    if not stats:
        if not categorical:
            stats = DEFAULT_STATS
        else:
            stats = []
    else:
        if isinstance(stats, basestring):
            if stats in ['*', 'ALL']:
                stats = VALID_STATS
            else:
                stats = stats.split()
    for x in stats:
        if x not in VALID_STATS:
            raise RasterStatsError("Stat `%s` not valid;" \
                " must be one of \n %r" % (x, VALID_STATS))

    run_count = False
    if categorical or 'majority' in stats or 'minority' in stats or \
       'unique' in stats:
        # run the counter once, only if needed
        run_count = True
    
    # open the first raster and use this, we will assume they are all the same size / bounds etc
    initrast = rasterlist[0]
    rds = gdal.Open(initrast, gdal.GA_ReadOnly)
    if not rds:
        raise RasterStatsError("Cannot open %r as GDAL raster" % raster)
    rb = rds.GetRasterBand(band_num)
    rgt = rds.GetGeoTransform()
    rsize = (rds.RasterXSize, rds.RasterYSize)
    rbounds = raster_extent_as_bounds(rgt, rsize)

    if nodata_value is not None:
        nodata_value = float(nodata_value)
        rb.SetNoDataValue(nodata_value)
    else:
        nodata_value = rb.GetNoDataValue()

    mem_drv = ogr.GetDriverByName('Memory')
    driver = gdal.GetDriverByName('MEM')

    results = []

    # in order to avoid re-rasterizing the zones for every values raster we've moved the rasterization out of the loop 
    # and will save the rasterized zone arrays into a dictionary (so we need enough memory to hold that)
    zoneFeatureRasters = {}
    globL = inf
    globB = inf
    globT = -inf
    globR = -inf
    
    for i,feat in enumerate(vectors):
    #for i,feat in vectors.iteritems():
        try:
            geomWKT = feat[geom_attr]
        except KeyError:
            print "No geom attr found in feature!"
            continue
        geom = wkt.loads(geomWKT)
        
        # Point and MultiPoint don't play well with GDALRasterize
        # convert them into box polygons the size of a raster cell
        buff = rgt[1] / 2.0
        if geom.type == "MultiPoint":
            geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) 
                                for pt in geom.geoms])
        elif geom.type == 'Point':
            geom = box(*(geom.buffer(buff).bounds))

        ogr_geom_type = shapely_to_ogr_type(geom.type)

        # "Clip" the geometry bounds to the overall raster bounding box
        # This should avoid any rasterIO errors for partially overlapping polys
        geom_bounds = list(geom.bounds)
        if geom_bounds[0] < rbounds[0]:
            geom_bounds[0] = rbounds[0]
        if geom_bounds[1] < rbounds[1]:
            geom_bounds[1] = rbounds[1]
        if geom_bounds[2] > rbounds[2]:
            geom_bounds[2] = rbounds[2]
        if geom_bounds[3] > rbounds[3]:
            geom_bounds[3] = rbounds[3]
        
        # Record the overall bounds of the features
        if geom_bounds[0] < globL:
            globL = geom_bounds[0]
        if geom_bounds[1] < globB:
            globB = geom_bounds[1]
        if geom_bounds[2] > globR:
            globR = geom_bounds[2]
        if geom_bounds[3] > globT:
            globT = geom_bounds[3]
            
        # calculate new geotransform of the feature subset
       
        src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize)

        new_gt = (
            (rgt[0] + (src_offset[0] * rgt[1])),
            rgt[1],
            0.0,
            (rgt[3] + (src_offset[1] * rgt[5])),
            0.0,
            rgt[5]
        )
        fid = None
        try:
            fid= feat[id_attr]
        except KeyError:
            fid = i
        if src_offset[2] < 0 or src_offset[3] < 0:
                # we're off the raster completely, no overlap at all
                # so there's no need to even bother trying to calculate
                print "Feature "+fid+" is off raster extent - skipping!"
                zoneFeatureRasters[fid] = None
            
        else: # Create a temporary vector layer in memory
            mem_ds = mem_drv.CreateDataSource('out')
            mem_layer = mem_ds.CreateLayer('out', None, ogr_geom_type)
            ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn())
            ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt)
            ogr_feature.SetGeometryDirectly(ogr_geom)
            mem_layer.CreateFeature(ogr_feature)

            # Rasterize it
            rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
            rvds.SetGeoTransform(new_gt)
            #(raster_dataset, [1], shape_layer, None, None, burn_values=[1], ['ALL_TOUCHED=TRUE']
            gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, [1], ['ALL_TOUCHED='+str(all_touched)])
            rv_array = rvds.ReadAsArray()
            zoneFeatureRasters[fid] = {
                 "zonearray":rv_array,
                 "src_offset":src_offset
            }
            
    initrast=None      
    if global_src_extent:
        # outside the loop: everything except actually reading the raster data
        # create an in-memory numpy array of the source raster data
        # covering the whole extent of the vector layer
        #if strategy != "ogr":
        #    raise RasterStatsError("global_src_extent requires OGR vector")

        # find extent of ALL features
        #ds = ogr.Open(vectors)
        #layer = ds.GetLayer(layer_num)
        #ex = layer.GetExtent()
        # transform from OGR extent to xmin, ymin, xmax, ymax
        #layer_extent = (ex[0], ex[2], ex[1], ex[3])
        
        layer_extent = (globL, globB, globR, globT)
        global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent, rsize)
            
    # now do the raster calculation aspects of the original task once for each input raster but getting the zone rasters from the populated dictionary
    # rather than re-rasterizing each time
    for rast in rasterlist:
        rastresults = []
        rds = gdal.Open(rast, gdal.GA_ReadOnly)
        if not rds:
           # raise RasterStatsError("Cannot open %r as GDAL raster" % rast)
           print
           print ("Cannot open %r as GDAL raster" % rast)
           print
           continue
        rb = rds.GetRasterBand(band_num)
        # we have to assume the raster size and transform are the same 
        thisRgt = rds.GetGeoTransform()
        thisRsize = (rds.RasterXSize, rds.RasterYSize)
        thisRbounds = raster_extent_as_bounds(rgt, rsize)
        if (thisRgt != rgt or thisRsize != rsize or thisRbounds != rbounds):
            print "Raster " + rast +" has differing size or geotransform from others - skipping!"
            continue

        if global_src_extent:
            global_src_array = rb.ReadAsArray(*global_src_offset)

        if nodata_value is not None:
            nodata_value = float(nodata_value)
            rb.SetNoDataValue(nodata_value)
        else:
            nodata_value = rb.GetNoDataValue()
       
        #for i, feat in enumerate(features_iter):
        # for i,feat in vectors.iteritems():
        for i, feat in enumerate(vectors):
            fid = None
            try:
                fid = feat[id_attr]
            except:
                fid = i
            if zoneFeatureRasters[fid] is None:
                # this happens when the feature was outside the raster extent so rasterizing it was skipped
                #feature_stats = dict([(s,None) for s in stats])
                continue
            else:
                zone_array = zoneFeatureRasters[fid]["zonearray"]
                src_offset = zoneFeatureRasters[fid]["src_offset"]
                if not global_src_extent:
                    # use feature's source extent and read directly from source
                    # fastest option when you have fast disks and well-indexed raster
                    # advantage: each feature uses the smallest raster chunk
                    # disadvantage: lots of disk reads on the source raster
                    src_array = rb.ReadAsArray(*src_offset)
                else:
                    # derive array from global source extent array
                    # useful *only* when disk IO or raster format inefficiencies are your limiting factor
                    # advantage: reads raster data in one pass before loop
                    # disadvantage: large vector extents combined with big rasters need lotsa memory
                    xa = src_offset[0] - global_src_offset[0]
                    ya = src_offset[1] - global_src_offset[1]
                    xb = xa + src_offset[2]
                    yb = ya + src_offset[3]
                    src_array = global_src_array[ya:yb, xa:xb]
                
                # Mask the source data array with our current feature
                # we take the logical_not to flip 0<->1 to get the correct mask effect
                # we also mask out nodata values explictly
                masked = numpy.ma.MaskedArray(
                    src_array,
                    mask=numpy.logical_or(
                        src_array == nodata_value,
                        numpy.logical_not(zone_array)
                    )
                )

                if run_count:
                    pixel_count = Counter(masked.compressed())

                if categorical:  
                    feature_stats = dict(pixel_count)
                else:
                    feature_stats = {}

                if 'min' in stats:
                    feature_stats['min'] = float(masked.min())
                if 'max' in stats:
                    feature_stats['max'] = float(masked.max())
                if 'mean' in stats:
                    feature_stats['mean'] = float(masked.mean())
                if 'count' in stats:
                    feature_stats['count'] = int(masked.count())
                # optional
                if 'sum' in stats:
                    feature_stats['sum'] = float(masked.sum())
                if 'std' in stats:
                    feature_stats['std'] = float(masked.std())
                if 'median' in stats:
                    feature_stats['median'] = float(numpy.median(masked.compressed()))
                if 'majority' in stats:
                    try:
                        feature_stats['majority'] = pixel_count.most_common(1)[0][0]
                    except IndexError:
                        feature_stats['majority'] = None
                if 'minority' in stats:
                    try:
                        feature_stats['minority'] = pixel_count.most_common()[-1][0]
                    except IndexError:
                        feature_stats['minority'] = None
                if 'unique' in stats:
                    feature_stats['unique'] = len(pixel_count.keys())
                if 'range' in stats:
                    try:
                        rmin = feature_stats['min']
                    except KeyError:
                        rmin = float(masked.min())
                    try:
                        rmax = feature_stats['max']
                    except KeyError:
                        rmax = float(masked.max())
                    feature_stats['range'] = rmax - rmin
        
            try:
                # Use the provided feature id as __fid__
                feature_stats[id_attr] = feat[id_attr]
            except:
                # use the enumerator
                feature_stats[id_attr] = i 

            if copy_properties:
                for key, val in feat.iteritems():
                    if key == id_attr or key == geom_attr:
                        continue
                    feature_stats[key] = val
            rastresults.append(feature_stats)
        yield {'rastername':rast,'stats':rastresults}
    rb = None
    rds = None
    zoneFeatureRasters = None
    ds = None