def test_nocrs_wgs84_geojson(self): vector_io = VectorFileIO( uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) raw_json = json.loads(vector_io.read(format=geo.formats.JSON)) self.assertFalse(hasattr(raw_json, 'crs')) epsg = vector_io.get_epsg() self.assertEquals(epsg, 4326)
def __init__(self, **kwargs): """ Create an instance of LeastCostProcess class. :param kwargs: """ super(LeastCostProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) self.validate()
def test_validationInputsMax(self): """ Test the GaiaProcess.validate() function - fail on > max input types """ vector_io1 = VectorFileIO(uri='/fake/path') vector_io2 = VectorFileIO(uri='/fake/path') with self.assertRaises(geo.GaiaException) as ge: geo.LengthProcess(inputs=[vector_io1, vector_io2]) self.assertIn('Incorrect # of inputs; expected 1', str(ge.exception))
def test_mercator_geojson(self): vector_io = VectorFileIO( uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json')) self.assertEquals(vector_io.get_epsg(), 3857) jsonwm = json.loads(vector_io.read(format=geo.formats.JSON)) self.assertEquals(jsonwm['crs']['properties']['name'], 'EPSG:3857') self.assertEquals(jsonwm['features'][0]['geometry']['coordinates'], [4940150.544527022, 3941210.867854486]) json84 = json.loads(vector_io.read(format=geo.formats.JSON, epsg=4326)) self.assertEquals(json84['crs']['properties']['name'], 'EPSG:4326') self.assertEquals(json84['features'][0]['geometry']['coordinates'], [44.378127400000004, 33.34517919999999])
def test_within(self): """ Test WithinProcess for vector inputs """ vector1_io = VectorFileIO( uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) vector2_io = VectorFileIO( uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) process = geo.WithinProcess(inputs=[vector1_io, vector2_io]) try: process.compute() self.assertEquals(len(process.output.data), 19) finally: if process: process.purge()
def test_within_reproject(self): """ Test WithinProcess for vector inputs, where output should be in same projection as first input (in this case, 3857). """ vector1_io = VectorFileIO( uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json')) vector2_io = VectorFileIO( uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) process = geo.WithinProcess(inputs=[vector1_io, vector2_io]) try: process.compute() self.assertEquals(process.output.data.crs, {'init': u'epsg:3857'}) self.assertEquals(len(process.output.data), 19) finally: if process: process.purge()
def test_validationInputsMin(self): """ Test the GaiaProcess.validate() function - fail on < minimum input types """ vector_io = VectorFileIO(uri='/fake/path1') with self.assertRaises(geo.GaiaException) as ge: geo.IntersectsProcess(inputs=[vector_io]) self.assertIn('Not enough inputs for process', str(ge.exception))
def test_validationInputsPass(self): """ Test the GaiaProcess.validate() function - pass on valid input """ raster_io = RasterFileIO(uri='/fake/path') vector_io = VectorFileIO(uri='/fake/path') try: geo.ZonalStatsProcess(inputs=[raster_io, vector_io]) except geo.GaiaException: self.fail("ZonalProcess should have passed validation but did not")
def test_validationInputsOrder(self): """ Test the GaiaProcess.validate() function - fail on incorrect order """ raster_iO = RasterFileIO(uri='/fake/path1') vector_io = VectorFileIO(uri='/fake/path2') with self.assertRaises(geo.GaiaException) as ge: geo.ZonalStatsProcess(inputs=[vector_io, raster_iO]) self.assertIn('Input #1 is of incorrect type.', str(ge.exception))
def test_distance(self): """ Test DistanceProcess for vector inputs """ vector1_io = VectorFileIO( uri=os.path.join(testfile_path, 'baghdad_districts.geojson')) vector2_io = VectorFileIO( uri=os.path.join(testfile_path, 'iraq_hospitals.geojson')) process = geo.DistanceProcess(inputs=[vector1_io, vector2_io]) try: process.compute() with open( os.path.join(testfile_path, 'distance_process_results.json')) as exp: expected_json = json.load(exp) actual_json = json.loads( process.output.read(format=geo.formats.JSON)) self.assertEquals(len(expected_json['features']), len(actual_json['features'])) finally: if process: process.purge()
class ZonalStatsProcess(GaiaProcess): """ Calculates statistical values from a raster dataset for each polygon in a vector dataset. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Raster image', 'type': types.RASTER, 'max': 1 }, {'description': 'Zones', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(ZonalStatsProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def compute(self): """ Run the process """ self.output.create_output_dir(self.output.uri) features = gdal_zonalstats( self.inputs[1].read(format=formats.JSON, epsg=self.inputs[0].get_epsg()), self.inputs[0].read()) self.output.data = GeoDataFrame.from_features(features) self.output.write()
def test_union(self): """ Test UnionProcess for vector inputs """ vector1_io = VectorFileIO(uri=os.path.join( testfile_path, 'baghdad_districts.geojson'), filters=[('NNAME', 'contains', '^A')]) vector2_io = VectorFileIO(uri=os.path.join( testfile_path, 'baghdad_districts.geojson'), filters=[('NNAME', 'contains', '^B')]) process = geo.UnionProcess(inputs=[vector1_io, vector2_io]) try: process.compute() with open(os.path.join(testfile_path, 'union_process_results.json')) as exp: expected_json = json.load(exp) actual_json = json.loads( process.output.read(format=geo.formats.JSON)) self.assertEquals(len(expected_json['features']), len(actual_json['features'])) finally: if process: process.purge()
def test_length(self): """ Test LengthProcess for vector inputs """ vector_roads = VectorFileIO(uri=os.path.join(testfile_path, 'iraq_roads.geojson'), filters=[('type', '=', 'motorway'), ('bridge', '=', 1)]) process = geo.LengthProcess(inputs=[vector_roads]) try: process.compute() with open( os.path.join(testfile_path, 'length_process_results.json')) as exp: expected_json = json.load(exp) actual_json = json.loads( process.output.read(format=geo.formats.JSON)) self.assertEquals(len(expected_json['features']), len(actual_json['features'])) finally: if process: process.purge()
def test_subset_raster(self): """ Test SubsetProcess for vector & raster inputs """ zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r') zipfile.extract('2states.geojson', testfile_path) vector_io = VectorFileIO( uri=os.path.join(testfile_path, '2states.geojson')) raster_io = RasterFileIO( uri=os.path.join(testfile_path, 'globalairtemp.tif')) process = geo.SubsetProcess(inputs=[raster_io, vector_io]) try: process.compute() self.assertEquals(type(process.output.data).__name__, 'Dataset') self.assertTrue(os.path.exists(process.output.uri)) self.assertIsNotNone(process.id) self.assertIn(process.id, process.output.uri) finally: testfile = os.path.join(testfile_path, '2states.geojson') if os.path.exists(testfile): os.remove(testfile) if process: process.purge()
class LengthProcess(GaiaProcess): """ Calculate the length of each feature in a dataset. If the dataset projection is not in metric units, it will be temporarily reprojected to EPSG:3857 to calculate the area. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Line/Polygon dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(LengthProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate lengths using pandas :return: Result as a GeoDataFrame """ featureio = self.inputs[0] original_projection = featureio.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg)) feature_df['length'] = feature_df.geometry.length if original_projection: feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs( epsg=original_projection) feature_df.crs = fiona.crs.from_epsg(original_projection) return feature_df def calc_postgis(self): """ Calculate lengths using PostGIS :return: Result as a GeoDataFrame """ featureio = self.inputs[0] geom0, epsg = featureio.geom_column, featureio.epsg srs = osr.SpatialReference() srs.ImportFromEPSG(epsg) geom_query = geom0 geometry_type = featureio.geometry_type length_func = 'ST_Perimeter' if 'POLYGON' in geometry_type.upper() \ else 'ST_Length' if not srs.GetAttrValue('UNIT').lower().startswith('met'): geom_query = 'ST_Transform({}, {})'.format( geom_query, 3857) geom_query = ', {}({}) as length'.format(length_func, geom_query) query, params = featureio.get_query() query = query.replace('FROM', '{} FROM'.format(geom_query)) logger.debug(query) return df_from_postgis(featureio.engine, query, params, geom0, epsg) def compute(self): """ Run the length process """ if self.inputs[0].__class__.__name__ == 'PostgisIO': data = self.calc_postgis() else: data = self.calc_pandas() self.output.data = data self.output.write()
def __init__(self, inputs=None, buffer_size=None, **kwargs): self.buffer_size = buffer_size super(BufferProcess, self).__init__(inputs, **kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath())
def __init__(self, distance=None, **kwargs): super(NearProcess, self).__init__(**kwargs) self.distance = distance if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath())
class NearProcess(GaiaProcess): """ Takes two inputs, the second assumed to contain a single feature, the first a vector dataset. Requires a distance argument, and the unit of measure should be meters. If inputs are not in a metric projection they will be reprojected to EPSG:3857. Returns the features in the first input within a specified distance of the point in the second input. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Features', 'type': types.VECTOR, 'max': 1 }, {'description': 'Point', 'type': types.VECTOR, 'max': 1 } ] #: Required arguments, data types as dict required_args = [{ 'name': 'distance', 'title': 'Distance', 'description': 'Distance to search for features, in meters', 'type': float }] #: Default output format default_output = formats.JSON def __init__(self, distance=None, **kwargs): super(NearProcess, self).__init__(**kwargs) self.distance = distance if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculates the features within the specified distance using pandas :return: results as a GeoDataFrame """ features = self.inputs[0] original_projection = self.inputs[0].get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None features_df = features.read(epsg=epsg) features_gs = features_df.geometry point_df = self.inputs[1].read(epsg=epsg)[:1] point_gs = point_df.geometry features_length = len(features_gs) min_dist = np.empty(features_length) for i, feature in enumerate(features_gs): min_dist[i] = np.min([feature.distance(point_gs[0])]) nearby_df = GeoDataFrame.copy(features_df) nearby_df['distance'] = min_dist distance_max = self.distance nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\ .sort_values('distance') if original_projection: nearby_df[nearby_df.geometry.name] = \ nearby_df.geometry.to_crs(epsg=original_projection) return nearby_df def calc_postgis(self): """ Calculates the features within the specified distance using PostGIS via DWithin plus K-Nearest Neighbor (KNN) query :return: results as a GeoDataFrame """ featureio = self.inputs[0] pointio = self.inputs[1] feature_geom, epsg = featureio.geom_column, featureio.epsg point_json = json.loads(pointio.read( format=formats.JSON))['features'][0] point_epsg = pointio.get_epsg() srs = osr.SpatialReference() srs.ImportFromEPSG(int(epsg)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 io_query, params = featureio.get_query() point_geom = 'ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON(\'' \ '{geojson}\'),{point_epsg}), {epsg})'.\ format(geojson=json.dumps(point_json['geometry']), point_epsg=point_epsg, epsg=epsg) dist1 = """, (SELECT ST_Distance( ST_Transform({table0}.{geom0},{epsg}), ST_Transform(point, {epsg})) FROM {point_geom} as point ORDER BY {table0}.{geom0} <#> point LIMIT 1) as distance FROM """.format(table0=featureio.table, geom0=feature_geom, point_geom=point_geom, epsg=epsg) dist2 = """ WHERE ST_DWithin({point_geom}, ST_Transform({table0}.{geom0},{epsg}), {distance}) """.format(table0=featureio.table, geom0=feature_geom, point_geom=point_geom, epsg=epsg, distance=self.distance) dist3 = ' ORDER BY distance ASC' query = re.sub('FROM', dist1, io_query).rstrip(';') if 'WHERE' in query: query = re.sub('WHERE', dist2 + ' AND ', query) else: query += dist2 query += dist3 logger.debug(query) return df_from_postgis(featureio.engine, query, params, feature_geom, epsg) def compute(self): """ Run the process """ if self.inputs[0].__class__.__name__ == 'PostgisIO': data = self.calc_postgis() else: data = self.calc_pandas() self.output.data = data self.output.write()
class DistanceProcess(GaiaProcess): """ Calculates the minimum distance from each feature of the first dataset to the nearest feature of the second dataset. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'From dataset', 'type': types.VECTOR, 'max': 1 }, {'description': 'To dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(DistanceProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the minimum distance between features using pandas GeoDataFrames. :return: Minimum distance results as a GeoDataFrame """ first = self.inputs[0] original_projection = first.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None first_df = first.read(epsg=epsg) first_gs = first_df.geometry first_length = len(first_gs) second_df = self.inputs[1].read(epsg=epsg) second_gs = second_df.geometry min_dist = np.empty(first_length) for i, first_features in enumerate(first_gs): min_dist[i] = np.min([first_features.distance(second_features) for second_features in second_gs]) distance_df = GeoDataFrame.copy(first_df) distance_df['distance'] = min_dist distance_df.sort_values('distance', inplace=True) if original_projection: distance_df[distance_df.geometry.name] = \ distance_df.geometry.to_crs(epsg=original_projection) return distance_df def calc_postgis(self): """ Calculate the minimum distance between features using PostGIS K-Nearest Neighbor (KNN) query :return: Minimum distance results as a GeoDataFrame """ diff_queries = [] diff_params = [] first = self.inputs[0] geom0, epsg = first.geom_column, first.epsg srs = osr.SpatialReference() srs.ImportFromEPSG(int(epsg)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 geom1 = self.inputs[1].geom_column for pg_io in self.inputs: io_query, params = pg_io.get_query() diff_queries.append(io_query.rstrip(';')) diff_params.insert(0, params) diff_params = [item for x in diff_params for item in x] dist1 = """, (SELECT ST_Distance( ST_Transform({table0}.{geom0},{epsg}), ST_Transform(query2.{geom1},{epsg})) as distance """.format(table0=self.inputs[0].table, geom0=geom0, geom1=geom1, epsg=epsg) dist2 = """ ORDER BY {table0}.{geom0} <#> query2.{geom1} LIMIT 1) FROM """.format(table0=self.inputs[0].table, geom0=geom0, geom1=geom1, epsg=epsg) dist3 = ' ORDER BY distance ASC' query = re.sub('FROM', dist1 + ' FROM (' + diff_queries[1] + ') as query2 ' + dist2, diff_queries[0]) + dist3 return df_from_postgis(first.engine, query, diff_params, geom0, epsg) def compute(self): """ Run the distance process """ input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write()
def __init__(self, combined=False, **kwargs): super(CentroidProcess, self).__init__(**kwargs) self.combined = combined if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath())
class CentroidProcess(GaiaProcess): """ Calculates the centroid point of a vector dataset. """ #: List of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Line/Polygon dataset', 'type': types.VECTOR, 'max': 1 } ] optional_args = [{ 'name': 'combined', 'title': 'Combined', 'description': 'Get centroid of features combined (default False)', 'type': bool, }] #: Default output format default_output = formats.JSON def __init__(self, combined=False, **kwargs): super(CentroidProcess, self).__init__(**kwargs) self.combined = combined if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the centroid using pandas GeoDataFrames :return: centroid as a GeoDataFrame """ df_in = self.inputs[0].read() df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name) if self.combined: gs = GeoSeries(df.geometry.unary_union.centroid, name=df_in.geometry.name) return GeoDataFrame(gs) else: df[df.geometry.name] = df.geometry.centroid return df def calc_postgis(self): """ Calculate the centroid using PostGIS :return: centroid as a GeoDataFrame """ pg_io = self.inputs[0] io_query, params = pg_io.get_query() geom0, epsg = pg_io.geom_column, pg_io.epsg if self.combined: query = 'SELECT ST_Centroid(ST_Union({geom})) as {geom}' \ ' from ({query}) as foo'.format(geom=geom0, query=io_query.rstrip(';')) else: query = re.sub('"{}"'.format(geom0), 'ST_Centroid("{geom}") as {geom}'.format( geom=geom0), io_query, 1) return df_from_postgis(pg_io.engine, query, params, geom0, epsg) def compute(self): """ Run the centroid process """ use_postgis = self.inputs[0].__class__.__name__ == 'PostgisIO' data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write() logger.debug(self.output)
class BufferProcess(GaiaProcess): """ Generates a buffer polygon around the geometries of the input data. The size of the buffer is determined by the 'buffer_size' args key and the unit of measure should be meters. If inputs are not in a metric projection they will be reprojected to EPSG:3857. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Feature dataset', 'type': types.VECTOR, 'max': 1 } ] #: Required arguments, data types as dict required_args = [ { 'name': 'buffer_size', 'title': 'Buffer Size', 'description': 'Size of the buffer in meters', 'type': float } ] #: Default output format default_output = formats.JSON def __init__(self, inputs=None, buffer_size=None, **kwargs): self.buffer_size = buffer_size super(BufferProcess, self).__init__(inputs, **kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the buffer using pandas GeoDataFrames :return: Buffer as a pandas GeoDataFrame """ featureio = self.inputs[0] original_projection = featureio.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None feature_df = featureio.read(epsg=epsg) buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union) buffer_df = GeoDataFrame(geometry=buffer) buffer_df.crs = feature_df.crs if original_projection: buffer_df[buffer_df.geometry.name] = buffer_df.to_crs( epsg=original_projection) buffer_df.crs = fiona.crs.from_epsg(original_projection) return buffer_df def calc_postgis(self): """ Calculate the buffer using PostGIS :return: Buffer as a pandas GeoDataFrame """ pg_io = self.inputs[0] original_projection = pg_io.epsg io_query, params = pg_io.get_query() srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): geom_query = 'ST_Transform({}, {})'.format( pg_io.geom_column, 3857) else: original_projection = None buffer_query = 'ST_Union(ST_Buffer({}, %s))'.format(geom_query) if original_projection: buffer_query = 'ST_Transform({}, {})'.format(buffer_query, original_projection) query = 'SELECT {buffer} as {geocol} ' \ 'FROM ({query}) as foo'.format(buffer=buffer_query, geocol=pg_io.geom_column, query=io_query.rstrip(';')) params.insert(0, self.buffer_size) logger.debug(query) return df_from_postgis(pg_io.engine, query, params, pg_io.geom_column, pg_io.epsg) def compute(self): """ Run the buffer process. """ if self.inputs[0].__class__.__name__ == 'PostgisIO': data = self.calc_postgis() else: data = self.calc_pandas() self.output.data = data self.output.write()
class UnionProcess(GaiaProcess): """ Combines two vector datasets into one. They datasets should have the same columns. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'First dataset', 'type': types.VECTOR, 'max': 1 }, {'description': 'Second dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(UnionProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the union using pandas GeoDataFrames :return: union result as a GeoDataFrame """ first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) if ''.join(first_df.columns) != ''.join(second_df.columns): raise GaiaException('Inputs must have the same columns') uniondf = GeoDataFrame(pd.concat([first_df, second_df])) return uniondf def calc_postgis(self): """ Calculate the union using PostGIS :return: union result as a GeoDataFrame """ union_queries = [] union_params = [] first = self.inputs[0] second = self.inputs[1] geom0, epsg = first.geom_column, first.epsg geom1, epsg1 = second.geom_column, second.epsg if ''.join(first.columns) != ''.join(second.columns): raise GaiaException('Inputs must have the same columns') for pg_io in self.inputs: io_query, params = pg_io.get_query() union_queries.append(io_query.rstrip(';')) union_params.extend(params) if epsg1 != epsg: geom1_query = 'ST_Transform({},{})'.format(geom1, epsg) union_queries[1] = union_queries[1].replace( '"{}"'.format(geom1), geom1_query) query = '({query0}) UNION ({query1})'\ .format(query0=union_queries[0], query1=union_queries[1]) return df_from_postgis(first.engine, query, union_params, geom0, epsg) def compute(self): """ Run the union process. """ input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write() logger.debug(self.output)
class WithinProcess(GaiaProcess): """ Similar to SubsetProcess but for vectors: calculates the features within a vector dataset that are within (or whose centroids are within) the polygons of a second vector dataset. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Feature dataset', 'type': types.VECTOR, 'max': 1 }, {'description': 'Within dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(WithinProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the within process using pandas GeoDataFrames :return: within result as a GeoDataFrame """ first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) first_within = first_df[first_df.geometry.within( second_df.geometry.unary_union)] return first_within def calc_postgis(self): """ Calculate the within process using PostGIS :return: within result as a GeoDataFrame """ first = self.inputs[0] within_queries = [] within_params = [] geom0 = first.geom_column epsg = first.epsg geom1 = self.inputs[1].geom_column for pg_io in self.inputs: io_query, params = pg_io.get_query() within_queries.append(io_query.rstrip(';')) within_params.extend(params) joinstr = ' AND ' if 'WHERE' in within_queries[0].upper() else ' WHERE ' query = '{query0} {join} ST_Within(ST_Transform({geom0},{epsg}), ' \ '(SELECT ST_Union(ST_TRANSFORM({geom1},{epsg})) ' \ 'from ({query1}) as q2))'\ .format(query0=within_queries[0], join=joinstr, geom0=geom0, geom1=geom1, epsg=epsg, query1=within_queries[1]) return df_from_postgis(first.engine, query, params, geom0, epsg) def compute(self): """ Run the Within process """ if len(self.inputs) != 2: raise GaiaException('WithinProcess requires 2 inputs') input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write()
class EqualsProcess(GaiaProcess): """ Calculates the features within the first vector dataset that are the same as the features of the second vector dataset. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'First dataset', 'type': types.VECTOR, 'max': 1 }, {'description': 'Second dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(EqualsProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate which features are equal using pandas :return: result as a GeoDataFrame """ first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) first_gs = first_df.geometry first_length = len(first_gs) second_gs = second_df.geometry matches = np.empty(first_length) for i, first_features in enumerate(first_gs): matched = [first_features.equals(second_features) for second_features in second_gs] matches[i] = True if (True in matched) else False output_df = GeoDataFrame.copy(first_df) output_df['equals'] = matches output_df = output_df[ (output_df['equals'] == 1)].drop('equals', 1) return output_df def calc_postgis(self): """ Calculate which features are equal using PostGIS :return: result as a GeoDataFrame """ equals_queries = [] equals_params = [] first = self.inputs[0] geom0, epsg = first.geom_column, first.epsg geom1 = self.inputs[1].geom_column for pg_io in self.inputs: io_query, params = pg_io.get_query() equals_queries.append(io_query.rstrip(';')) equals_params.extend(params) joinstr = ' AND ' if 'WHERE' in equals_queries[0].upper() else ' WHERE ' query = '{query0} {join} {geom0} IN (SELECT {geom1} ' \ 'FROM ({query1}) as second)'.format(query0=equals_queries[0], query1=equals_queries[1], join=joinstr, geom0=geom0, geom1=geom1) logger.debug(query) return df_from_postgis(first.engine, query, equals_params, geom0, epsg) def compute(self): """ Run the process """ input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write() logger.debug(self.output)
class LeastCostProcess(GaiaProcess): """ Process to calculate the least cost path between two points over a raster grid. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [{ 'description': 'Raster dataset', 'type': types.RASTER, 'max': 1 }, { 'description': 'Start/end point dataset(s)', 'type': types.VECTOR, 'max': 2 }] default_output = formats.JSON def __init__(self, **kwargs): """ Create an instance of LeastCostProcess class. :param kwargs: """ super(LeastCostProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) self.validate() def array2shp(self, array, outSHPfn, rasterfn, pixelValue): """ Convert a grid array representation of the path into a shapefile :param array: least cost path as numeric grid array :param outSHPfn: output shapefile :param rasterfn: raster file used to calculate path :param pixelValue: cell value of path in grid array """ raster = get_dataset(rasterfn.uri) geotransform = raster.GetGeoTransform() pixelWidth = geotransform[1] maxDistance = ceil(sqrt(2 * pixelWidth * pixelWidth)) count = 0 roadList = np.where(array == pixelValue) pointDict = {} for indexY in roadList[0]: indexX = roadList[1][count] Xcoord, Ycoord = self.pixel_offset2coord(rasterfn, indexX, indexY) pointDict[count] = (Xcoord, Ycoord) count += 1 multiline = ogr.Geometry(ogr.wkbMultiLineString) for i in itertools.combinations(pointDict.values(), 2): point1 = ogr.Geometry(ogr.wkbPoint) point1.AddPoint(i[0][0], i[0][1]) point2 = ogr.Geometry(ogr.wkbPoint) point2.AddPoint(i[1][0], i[1][1]) distance = point1.Distance(point2) # calculate the distance between two points if distance < maxDistance: line = ogr.Geometry(ogr.wkbLineString) line.AddPoint(i[0][0], i[0][1]) line.AddPoint(i[1][0], i[1][1]) multiline.AddGeometry(line) shpDriver = ogr.GetDriverByName("GeoJSON") if os.path.exists(outSHPfn): shpDriver.DeleteDataSource(outSHPfn) else: self.output.create_output_dir(outSHPfn) outDataSource = shpDriver.CreateDataSource(outSHPfn) outLayer = outDataSource.CreateLayer(outSHPfn, geom_type=ogr.wkbMultiLineString) featureDefn = outLayer.GetLayerDefn() outFeature = ogr.Feature(featureDefn) outFeature.SetGeometry(multiline) outLayer.CreateFeature(outFeature) def raster_to_array(self, rasterfn): """ Convert a raster grid into an array :param raster: input raster :return: array """ raster = get_dataset(rasterfn.uri) band = raster.GetRasterBand(1) array = band.ReadAsArray() return array def coord2pixeloffset(self, rasterfn, x, y): """ Convert lat/long coordinates to pixel coordinates :param rasterfn: raster file :param x: longitude :param y: latitude :return: """ raster = get_dataset(rasterfn.uri) geotransform = raster.GetGeoTransform() originX = geotransform[0] originY = geotransform[3] pixelWidth = geotransform[1] pixelHeight = geotransform[5] xOffset = int((x - originX) / pixelWidth) yOffset = int((y - originY) / pixelHeight) return xOffset, yOffset def pixel_offset2coord(self, rasterfn, xOffset, yOffset): """ Convert pixel coordinates to lat/long coordinates :param rasterfn: raster dataset :param xOffset: longitude offset :param yOffset: latitude offset :return: coordinates """ raster = get_dataset(rasterfn.uri) geotransform = raster.GetGeoTransform() originX = geotransform[0] originY = geotransform[3] pixelWidth = geotransform[1] pixelHeight = geotransform[5] coordX = originX + pixelWidth * xOffset coordY = originY + pixelHeight * yOffset return coordX, coordY def create_path(self, raster, costSurfaceArray, start, end): """ Calculate the least cost path :param raster: Raster file :param costSurfaceArray: raster file as numeric array :param start: start point :param end: end point :return: least cost path as grid array """ # coordinates to array index startCoordX = start.x startCoordY = start.y startIndexX, startIndexY = self.coord2pixeloffset( raster, startCoordX, startCoordY) stopCoordX = end.x stopCoordY = end.y stopIndexX, stopIndexY = self.coord2pixeloffset( raster, stopCoordX, stopCoordY) # create path indices, weight = route_through_array(costSurfaceArray, (startIndexY, startIndexX), (stopIndexY, stopIndexX), geometric=True, fully_connected=True) indices = np.array(indices).T path = np.zeros_like(costSurfaceArray) path[indices[0], indices[1]] = 1 return path def calculate_path(self, raster, start, end): """ Convert the input raster into an array and calculate the least cost path as an array :param raster: raster file :param start: start point :param end: end point """ costSurfaceArray = self.raster_to_array(raster) pathArray = self.create_path(raster, costSurfaceArray, start, end) self.array2shp(pathArray, self.output.uri, raster, 1) def compute(self): """ Perform the process calculations """ if len(self.inputs) == 3: start_point = self.inputs[1].read().iloc[0].geometry.centroid end_point = self.inputs[2].read().iloc[0].geometry.centroid else: start_point = self.inputs[1].read().iloc[0].geometry.centroid end_point = self.inputs[1].read().iloc[1].geometry.centroid self.calculate_path(self.inputs[0], start_point, end_point)
class CrossesProcess(GaiaProcess): """ Calculates the features within the first vector dataset that cross the combined features of the second vector dataset. """ #: Tuple of required inputs; name, type , max # of each; None = no max required_inputs = [ {'description': 'Feature dataset', 'type': types.VECTOR, 'max': 1 }, {'description': 'Crosses dataset', 'type': types.VECTOR, 'max': 1 } ] #: Default output format default_output = formats.JSON def __init__(self, **kwargs): super(CrossesProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath()) def calc_pandas(self): """ Calculate the process using pandas :return: result as a GeoDataFrame """ first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) first_intersects = first_df[first_df.geometry.crosses( second_df.geometry.unary_union)] return first_intersects def calc_postgis(self): """ Calculate the process using PostGIS :return: result as a GeoDataFrame """ cross_queries = [] cross_params = [] first = self.inputs[0] geom0, epsg = first.geom_column, first.epsg geom1 = self.inputs[1].geom_column for pg_io in self.inputs: io_query, params = pg_io.get_query() cross_queries.append(io_query.rstrip(';')) cross_params.extend(params) joinstr = ' AND ' if 'WHERE' in cross_queries[0].upper() else ' WHERE ' query = '{query0} {join} (SELECT ST_Crosses(ST_Transform(' \ '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \ 'q2.{geom1},{epsg}))) from ({query1}) as q2)'\ .format(query0=cross_queries[0], join=joinstr, geom0=geom0, geom1=geom1, epsg=epsg, query1=cross_queries[1], table=first.table) return df_from_postgis(first.engine, query, cross_params, geom0, epsg) def compute(self): """ Run the crosses process """ input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write() logger.debug(self.output)
def __init__(self, **kwargs): super(CrossesProcess, self).__init__(**kwargs) if not self.output: self.output = VectorFileIO(name='result', uri=self.get_outpath())
def main(args): parser = argparse.ArgumentParser( description= "Write a destination file with all features from source file " "overlapping specified polygon") parser.add_argument( "source", help="Source vector file name that contain all features") parser.add_argument( "-p", "--polygon", nargs="+", help="xmin ymin xmax ymax or vector file specifying the polygon") parser.add_argument("destination", help="Destination vector file with only features " "overlapping the specified polygon") args = parser.parse_args(args) if (len(args.polygon) == 4): xmin, ymin, xmax, ymax = [float(val) for val in args.polygon] polygon = FeatureIO(features=[ { "geometry": { "type": "Polygon", "coordinates": [[[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]] }, "properties": { "id": "Bounding box" } }, ]) elif (len(args.polygon) == 1): polygonImage = gdal.Open(args.polygon[0], gdal.GA_ReadOnly) if (polygonImage): gt = polygonImage.GetGeoTransform() cols = polygonImage.RasterXSize rows = polygonImage.RasterYSize ext = GetExtent(gt, cols, rows) src_srs = osr.SpatialReference() src_srs.ImportFromWkt(polygonImage.GetProjection()) tgt_srs = src_srs.CloneGeogCS() p = ReprojectCoords(ext, src_srs, tgt_srs) polygon = FeatureIO(features=[ { "geometry": { "type": "Polygon", "coordinates": [[[p[0][0], p[0][1]], [ p[1][0], p[1][1] ], [p[2][0], p[2][1]], [p[3][0], p[3][1]]]] }, "properties": { "id": "Bounding box" } }, ]) else: polygon = VectorFileIO(uri=args.polygon[0]) else: raise RuntimeError("Error: wrong number of parameters for polygon: {} " "(can be 4 or 1)".format(len(args.polygon))) source = VectorFileIO(uri=args.source) destination = VectorFileIO(uri=args.destination) intersectProcess = IntersectsProcess(inputs=[source, polygon], output=destination) intersectProcess.compute()