Exemplo n.º 1
0
def init_sc():
    global sc
    if not sc:
        conf = gps.geopyspark_conf(appName="geopyspark-example",
                                   master="local[*]")
        conf.set(key='spark.ui.enabled', value='true')
        sc = SparkContext(conf=conf)
Exemplo n.º 2
0
class BaseTestClass(unittest.TestCase):
    if 'TRAVIS' in os.environ:
        master_str = "local[2]"
    else:
        master_str = "local[*]"

    conf = geopyspark_conf(master=master_str, appName="test")
    conf.set('spark.kryoserializer.buffer.max', value='1G')
    conf.set('spark.ui.enabled', True)

    if 'TRAVIS' in os.environ:
        conf.set(key='spark.driver.memory', value='1G')
        conf.set(key='spark.executor.memory', value='1G')

    pysc = SparkContext(conf=conf)

    dir_path = file_path("all-ones.tif")

    rdd = get(LayerType.SPATIAL, dir_path, max_tile_size=1024)
    value = rdd.to_numpy_rdd().collect()[0]

    projected_extent = value[0]
    extent = projected_extent.extent

    expected_tile = value[1].cells
    (_, rows, cols) = expected_tile.shape

    layout = TileLayout(1, 1, cols, rows)
Exemplo n.º 3
0
def rdd_tms_server(catalog_path, resource_name):
    conf = gps.geopyspark_conf(master="local[*]", appName="master")
    pysc = SparkContext(conf=conf)

    print("Tiles Catalog Path")
    catalog_path = "file://" + catalog_path
    data_name = resource_name

    print("Color Setting")
    color_dict = {}
    for i in range(120000):
        color_dict[i] = int(random_color(), 16)
    cm = gps.ColorMap.build(color_dict)

    print('TMS Setting')
    tms = gps.TMS.build(source=(catalog_path, data_name), display=cm)

    print('Set up TMS server')
    tms.bind(host="0.0.0.0", requested_port=8085)
    print(tms.url_pattern)

    time.sleep(365 * 24 * 60 * 60)

    print('Shutdown TMS server')
    tms.unbind()
Exemplo n.º 4
0
def _setup_local_spark(print: Callable = print,
                       verbosity=0,
                       master_url="local[2]",
                       app_name="openEO-GeoPySpark-Driver",
                       additional_jar_dirs=[]):
    print("Setting up local Spark")

    if 'PYSPARK_PYTHON' not in os.environ:
        os.environ['PYSPARK_PYTHON'] = sys.executable

    _ensure_geopyspark(print=print)
    from geopyspark import geopyspark_conf
    from pyspark import SparkContext

    conf = geopyspark_conf(master=master_url,
                           appName=app_name,
                           additional_jar_dirs=additional_jar_dirs)
    conf.set('spark.kryoserializer.buffer.max', value='1G')
    # Only show spark progress bars for high verbosity levels
    conf.set('spark.ui.showConsoleProgress', verbosity >= 3)
    conf.set('spark.ui.enabled', True)
    # TODO: allow finetuning the config more?

    print("SparkContext.getOrCreate with {c!r}".format(c=conf.getAll()))
    context = SparkContext.getOrCreate(conf)

    return context
Exemplo n.º 5
0
def setup_local_spark():
    from pyspark import find_spark_home, SparkContext

    spark_python = os.path.join(find_spark_home._find_spark_home(), 'python')
    py4j = glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0]
    sys.path[:0] = [spark_python, py4j]
    _log.debug('sys.path: {p!r}'.format(p=sys.path))
    if 'TRAVIS' in os.environ:
        master_str = "local[2]"
    else:
        master_str = "local[*]"

    from geopyspark import geopyspark_conf
    conf = geopyspark_conf(master=master_str, appName="openeo-geotrellis-local")
    conf.set('spark.kryoserializer.buffer.max', value='1G')
    conf.set('spark.ui.enabled', True)
    # Some options to allow attaching a Java debugger to running Spark driver
    conf.set('spark.driver.extraJavaOptions', '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5009')

    if 'TRAVIS' in os.environ:
        conf.set(key='spark.driver.memory', value='2G')
        conf.set(key='spark.executor.memory', value='2G')

    if 'PYSPARK_PYTHON' not in os.environ:
        os.environ['PYSPARK_PYTHON'] = sys.executable

    _log.info('Creating Spark context with config:')
    for k, v in conf.getAll():
        _log.info("Spark config: {k!r}: {v!r}".format(k=k, v=v))
    pysc = SparkContext.getOrCreate(conf)
    _log.info('Created Spark Context {s}'.format(s=pysc))
    _log.info('Spark web UI: http://localhost:{p}/'.format(p=pysc.getConf().get('spark.ui.port') or 4040))

    return pysc
Exemplo n.º 6
0
def test():
	conf = gps.geopyspark_conf(master="local[*]", appName="master")
	pysc = SparkContext(conf=conf)

	layer_metadata = gps.read_layer_metadata(uri="file:///usr/local/large_scale_hydro/catalog", layer_name="demo-dem", layer_zoom=0)
	layer_extent = layer_metadata.extent
	print(layer_extent)

	poly = None
	# Creates a Polygon from Geojson
	json_path = '/usr/local/large_scale_hydro/result/polygon.geojson'
	with open(json_path) as f:
		js = json.load(f)
		features = js['features']
		if features[0]['geometry']['type'] == 'Polygon':
			polygon = features[0]['geometry']['coordinates']
			points = polygon[0]
			input_array = []
			for point in points:
				input_array.append(tuple(point))
			poly = Polygon(input_array)

	tiled_raster_layer = gps.query(uri="file:///usr/local/large_scale_hydro/catalog", layer_name="demo-dem", layer_zoom=0, query_geom=poly)
	print(tiled_raster_layer.count())
	print(tiled_raster_layer.layer_metadata.extent)
	tiled_raster_layer.save_stitched('/usr/local/large_scale_hydro/result/result_geojson.tif')
Exemplo n.º 7
0
def data_search(catalog_path, json_path, dem_tif_path, dir_tif_path,
                acc_tif_path):
    catalog_path = "file://" + catalog_path
    conf = gps.geopyspark_conf(master="local[*]", appName="master")
    pysc = SparkContext(conf=conf)

    polys = None
    # Creates a MultiPolygon from Geojson
    with open(json_path) as f:
        js = json.load(f)
        FeatureObj = None
        if js['type'] == 'FeatureCollection':
            FeatureObj = js['features'][0]
        elif js['type'] == 'Feature':
            FeatureObj = js
        if FeatureObj['geometry']['type'] == 'MultiPolygon':
            polygons = FeatureObj['geometry']['coordinates']
            polygons_array = []
            for polygon in polygons:
                input_array = []
                for point in polygon[0]:
                    input_array.append(tuple(point))
                polygons_array.append(Polygon(input_array))
            polys = MultiPolygon(polygons_array)
        elif FeatureObj['geometry']['type'] == 'Polygon':
            polygon = FeatureObj['geometry']['coordinates']
            points = polygon[0]
            input_array = []
            for point in points:
                input_array.append(tuple(point))
            polys = Polygon(input_array)

    print("Get DEM")
    tiled_raster_layer = gps.query(uri=catalog_path,
                                   layer_name="dem",
                                   layer_zoom=0,
                                   query_geom=polys)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(dem_tif_path)

    print("Get Direction")
    tiled_raster_layer = gps.query(uri=catalog_path,
                                   layer_name="direction",
                                   layer_zoom=0,
                                   query_geom=polys)
    # tiled_raster_layer = gps.query(uri=catalog_path, layer_name="dir", layer_zoom=0, query_geom=polys)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(dir_tif_path)

    print("Get Accumulation")
    tiled_raster_layer = gps.query(uri=catalog_path,
                                   layer_name="accumulation",
                                   layer_zoom=0,
                                   query_geom=polys)
    # tiled_raster_layer = gps.query(uri=catalog_path, layer_name="acc", layer_zoom=0, query_geom=polys)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(acc_tif_path)
Exemplo n.º 8
0
    def __init__(self):
        master_str = "local[*]"

        conf = geopyspark_conf(master=master_str, appName="test")
        conf.set('spark.kryoserializer.buffer.max', value='1G')
        conf.set('spark.ui.enabled', True)

        if ConfigParams().is_ci_context:
            conf.set(key='spark.driver.memory', value='2G')
            conf.set(key='spark.executor.memory', value='2G')

        self.pysc = SparkContext.getOrCreate(conf)

        self.first = np.zeros((1, 4, 4))
        self.first.fill(1)

        self.second = np.zeros((1, 4, 4))
        self.second.fill(2)

        self.extent = {'xmin': 0.0, 'ymin': 0.0, 'xmax': 4.0, 'ymax': 4.0}
        self.layout = {
            'layoutCols': 1,
            'layoutRows': 1,
            'tileCols': 4,
            'tileRows': 4
        }

        self.now = datetime.datetime.strptime("2017-09-25T11:37:00Z",
                                              '%Y-%m-%dT%H:%M:%SZ')
Exemplo n.º 9
0
def _local_spark_conf() -> SparkConf:
    import geopyspark as gps
    conf = gps.geopyspark_conf(master="local[*]", appName="benchmark.py")
    conf.set('spark.yarn.keytab',
             "/home/bossie/Documents/VITO/vdboschj.keytab")
    conf.set('spark.yarn.principal', "vdboschj")

    return conf
Exemplo n.º 10
0
def _setup_local_spark(out: TerminalReporter, verbosity=0):
    # TODO make a "spark_context" fixture instead of doing this through pytest_configure
    out.write_line("[conftest.py] Setting up local Spark")

    travis_mode = 'TRAVIS' in os.environ
    master_str = "local[2]" if travis_mode else "local[2]"

    if 'PYSPARK_PYTHON' not in os.environ:
        os.environ['PYSPARK_PYTHON'] = sys.executable

    from geopyspark import geopyspark_conf
    from pyspark import SparkContext

    conf = geopyspark_conf(master=master_str,
                           appName="OpenEO-GeoPySpark-Driver-Tests")
    conf.set('spark.kryoserializer.buffer.max', value='1G')
    conf.set(key='spark.kryo.registrator',
             value='geopyspark.geotools.kryo.ExpandedKryoRegistrator')
    conf.set(
        key='spark.kryo.classesToRegister',
        value=
        'org.openeo.geotrellisaccumulo.SerializableConfiguration,ar.com.hjg.pngj.ImageInfo,ar.com.hjg.pngj.ImageLineInt,geotrellis.raster.RasterRegion$GridBoundsRasterRegion'
    )
    # Only show spark progress bars for high verbosity levels
    conf.set('spark.ui.showConsoleProgress', verbosity >= 3)

    if travis_mode:
        conf.set(key='spark.driver.memory', value='2G')
        conf.set(key='spark.executor.memory', value='2G')
        conf.set('spark.ui.enabled', False)
    else:
        conf.set('spark.ui.enabled', True)

    out.write_line("[conftest.py] SparkContext.getOrCreate with {c!r}".format(
        c=conf.getAll()))
    context = SparkContext.getOrCreate(conf)
    out.write_line("[conftest.py] JVM info: {d!r}".format(
        d={
            f: context._jvm.System.getProperty(f)
            for f in [
                "java.version",
                "java.vendor",
                "java.home",
                "java.class.version",
                # "java.class.path",
            ]
        }))

    out.write_line("[conftest.py] Validating the Spark context")
    dummy = context._jvm.org.openeo.geotrellis.OpenEOProcesses()
    answer = context.parallelize([9, 10, 11, 12]).sum()
    out.write_line("[conftest.py] " + repr((answer, dummy)))

    return context
Exemplo n.º 11
0
def _setup_local_spark(out: TerminalReporter, verbosity=0):
    # TODO make a "spark_context" fixture instead of doing this through pytest_configure
    out.write_line("Setting up local Spark")

    travis_mode = 'TRAVIS' in os.environ
    master_str = "local[2]" if travis_mode else "local[*]"

    from geopyspark import geopyspark_conf
    from pyspark import SparkContext

    conf = geopyspark_conf(master=master_str, appName="OpenEO-GeoPySpark-Driver-Tests")
    conf.set('spark.kryoserializer.buffer.max', value='1G')
    # Only show spark progress bars for high verbosity levels
    conf.set('spark.ui.showConsoleProgress', verbosity >= 3)

    if travis_mode:
        conf.set(key='spark.driver.memory', value='2G')
        conf.set(key='spark.executor.memory', value='2G')
        conf.set('spark.ui.enabled', False)
    else:
        conf.set('spark.ui.enabled', True)

    out.write_line("SparkContext.getOrCreate with {c!r}".format(c=conf.getAll()))
    context = SparkContext.getOrCreate(conf)
    out.write_line("JVM info: {d!r}".format(d={
        f: context._jvm.System.getProperty(f)
        for f in [
            "java.version", "java.vendor", "java.home",
            "java.class.version",
            # "java.class.path",
        ]
    }))

    out.write_line("Validating the Spark context")
    dummy = context._jvm.org.openeo.geotrellis.OpenEOProcesses()
    answer = context.parallelize([9, 10, 11, 12]).sum()
    out.write_line(repr((answer, dummy)))

    return context
Exemplo n.º 12
0
def main(data_path, area_of_interest, outfile):
    # Create the SparkContext
    conf = gps.geopyspark_conf(appName="geodatafw", master="local[*]")
    sc = SparkContext(conf=conf)

    # Create raster_layer object from Sentinel 2 data
    raster_layer = get_raster_layer(sc, data_path)

    # Tile the rasters within the layer and reproject them to Web Mercator.
    tiled_layer = raster_layer.tile_to_layout(layout=gps.GlobalLayout(),
                                              target_crs=3857)

    # Mask the tiles within the layer with the area of interest
    masked = tiled_layer.mask(geometries=area_of_interest)

    # We will now pyramid the masked TiledRasterLayer so that we can use it in a TMS server later.
    # pyramided_mask = masked.pyramid()

    # Save each layer of the pyramid locally so that it can be accessed at a later time.
    #for pyramid in pyramided_mask.levels.values():
    gps.write(uri='file://%s' % outfile,
              layer_name='munsmo',
              tiled_raster_layer=masked)
Exemplo n.º 13
0
def test(work_path):
    process_path = work_path + "/process"
    if not os.path.exists(process_path):
        os.makedirs(process_path)

    conf = gps.geopyspark_conf(master="local[*]", appName="master")
    pysc = SparkContext(conf=conf)

    poly = None
    # Creates a Polygon from Geojson
    json_path = '/usr/local/large_scale_hydro/result/polygon.geojson'
    with open(json_path) as f:
        js = json.load(f)
        FeatureObj = None
        if js['type'] == 'FeatureCollection':
            FeatureObj = js['features'][0]
        elif js['type'] == 'Feature':
            FeatureObj = js
        if FeatureObj['geometry']['type'] == 'MultiPolygon':
            polygons = FeatureObj['geometry']['coordinates']
            polygons_array = []
            for polygon in polygons:
                input_array = []
                for point in polygon[0]:
                    input_array.append(tuple(point))
                polygons_array.append(Polygon(input_array))
            polys = MultiPolygon(polygons_array)
        elif FeatureObj['geometry']['type'] == 'Polygon':
            polygon = FeatureObj['geometry']['coordinates']
            points = polygon[0]
            input_array = []
            for point in points:
                input_array.append(tuple(point))
            polys = Polygon(input_array)

    dem_tif_path = process_path + '/dem.tif'
    print("Get DEM")
    tiled_raster_layer = gps.query(
        uri="file:///usr/local/large_scale_hydro/catalog",
        layer_name="dem",
        layer_zoom=0,
        query_geom=polys)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(dem_tif_path)

    dir_tif_path = process_path + '/dir.tif'
    print("Get Direction")
    tiled_raster_layer = gps.query(
        uri="file:///usr/local/large_scale_hydro/catalog",
        layer_name="direction",
        layer_zoom=0,
        query_geom=poly)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(dir_tif_path)

    acc_tif_path = process_path + '/acc.tif'
    print("Get Accumulation")
    tiled_raster_layer = gps.query(
        uri="file:///usr/local/large_scale_hydro/catalog",
        layer_name="accumulation",
        layer_zoom=0,
        query_geom=poly)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(acc_tif_path)

    lake_tif_path = process_path + '/lakes.tif'
    print("Get Lakes")
    tiled_raster_layer = gps.query(
        uri="file:///usr/local/large_scale_hydro/catalog",
        layer_name="lakes",
        layer_zoom=0,
        query_geom=poly)
    print(tiled_raster_layer.count())
    print(tiled_raster_layer.layer_metadata.extent)
    tiled_raster_layer.save_stitched(lake_tif_path)
Exemplo n.º 14
0
import geopyspark as gps

from pyspark import SparkContext
from shapely.geometry import box


# Create the SparkContext
conf = gps.geopyspark_conf(appName="geopyspark-example", master="local[*]")
sc = SparkContext(conf=conf)

# Read in the NLCD tif that has been saved locally.
# This tif represents the state of Pennsylvania.
raster_layer = gps.geotiff.get(layer_type=gps.LayerType.SPATIAL,
                               uri='/tmp/NLCD2011_LC_Pennsylvania.tif',
                               num_partitions=100)

# Tile the rasters within the layer and reproject them to Web Mercator.
tiled_layer = raster_layer.tile_to_layout(layout=gps.GlobalLayout(), target_crs=3857)

# Creates a Polygon that covers roughly the north-west section of Philadelphia.
# This is the region that will be masked.
area_of_interest = box(-75.229225, 40.003686, -75.107345, 40.084375)

# Mask the tiles within the layer with the area of interest
masked = tiled_layer.mask(geometries=area_of_interest)

# We will now pyramid the masked TiledRasterLayer so that we can use it in a TMS server later.
pyramided_mask = masked.pyramid()

# Save each layer of the pyramid locally so that it can be accessed at a later time.
for pyramid in pyramided_mask.levels.values():
Exemplo n.º 15
0
        new_line.pop('uri')

    return new_line


def make_tiles(line):
    projected_extent = line[0]
    bands = sorted(line[1], key=lambda l: l['band'])
    array = np.array([l['data'] for l in bands])
    tile = gps.Tile.from_numpy_array(array, no_data_value=0)
    return (projected_extent, tile)


if __name__ == "__main__":

    sc = SparkContext(conf=gps.geopyspark_conf(
        appName="Landsat").set("spark.ui.enabled", True))
    csv_data = [{
        'uri':
        's3://landsat-pds/L8/107/035/LC81070352015218LGN00/LC81070352015218LGN00_B9.TIF',
        'scene_id': 'LC81070352015218LGN00',
        'date': '2015218',
        'band': '9'
    }, {
        'uri':
        's3://landsat-pds/L8/107/035/LC81070352015218LGN00/LC81070352015218LGN00_B9.TIF',
        'scene_id': 'LC81070352015218LGN00',
        'date': '2015218',
        'band': '9'
    }, {
        'uri':
        's3://landsat-pds/L8/107/035/LC81070352015218LGN00/LC81070352015218LGN00_B9.TIF',
Exemplo n.º 16
0
def overlay_geopyspark_conf(appName='ClimateOverlay', memory='12G'):
    conf = gps.geopyspark_conf(appName="ClimateOverlay")
    conf.set('spark.master.memory', '12G')
    conf.set('spark.ui.enabled', True)
    sc = SparkContext(conf=conf)
Exemplo n.º 17
0
from threading import Thread

import requests
from flask import Flask, request
# for strange reasons, one has to import netCDF4 before geopyspark on some systems, otherwise reading nc-files fails.
import netCDF4
import geopyspark as gps
from pyspark import SparkContext

from cuizinart_pyspark import slice
from pyspark_settings import SPARK_MASTER, CUIZINART_URL, NC_OUTPUT_PATH, CUIZINART_PYSPARK_PASSWORD

logger = logging.getLogger('pyspark')
app = Flask('cuizinart_pyspark')

conf = gps.geopyspark_conf(appName='gwf', master=SPARK_MASTER)
sc = SparkContext(conf=conf)


@app.route('/process_query', methods=['POST'])
def process_query():
    request_json = request.get_json()
    request_id = request_json['request_id']
    user_email = request_json['user_email']
    product = request_json['product']
    geojson_shape = request_json['geojson_shape']
    start_time = request_json['start_time']
    end_time = request_json['end_time']
    request_vars = request_json['request_vars']
    horizons = request_json['horizons']
    issues = request_json['issues']
Exemplo n.º 18
0
import datetime
import geopyspark as gps
import numpy as np

from pyspark import SparkContext
from shapely.geometry import MultiPolygon, box

conf = gps.geopyspark_conf(master="local[*]", appName="layers")
pysc = SparkContext(conf=conf)

uri = "file:/data/workspace/geotrellis-landsat-tutorial/data/1k_tiles/"
layer_name = "landsat1K"

metadata = gps.read_layer_metadata(uri=uri,
                                   layer_name=layer_name,
                                   layer_zoom=0)
print(metadata)

# Get list of tiles
print(metadata.bounds)

# Read the first tile
tile = gps.read_value(uri=uri,
                      layer_name=layer_name,
                      layer_zoom=0,
                      col=metadata.bounds.minKey.col,
                      row=metadata.bounds.minKey.row)

print(tile)

# Read the layer
import geopyspark
import numpy

from pyspark.sql import SparkSession
from pyrasterframes import *
from pyrasterframes.rasterfunctions import *

conf = geopyspark.geopyspark_conf(appName="POC")
session = SparkSession.builder.config(
    conf=conf).getOrCreate().withRasterFrames()

uri = "file:/data/workspace/geotrellis-landsat-tutorial/data/1k_tiles/"
layer_name = "landsat1K"

layer = geopyspark.query(uri=uri, layer_name=layer_name, layer_zoom=0)

rf = layer.to_rasterframe(3)
rf.show(2)

# Show CRS
rf.tileLayerMetadata()['crs']

# Convert Tile data to array
rf.select(tileToDoubleArray("tile_1")).show(10, 80)

# Global aggregation statistics
rf.agg(aggNoDataCells("tile_1"), aggDataCells("tile_1"),
       aggMean("tile_1")).show(5, False)

# Tile aggregation statistics
rf.select(tileMean("tile_1"), tileMin("tile_1"), tileMax("tile_1")).show(5)