def try_dask_filter2D(pathimage, kernel, output_pathimage, unit_height=500, unit_width=500): client = Client() # client.upload_file("tile/tile.py") (collection, info) = build_collection_tile(pathimage,unit_height,unit_width) with callgrind(tag="Graph"): rdd = db.from_sequence(collection).map(lambda n: n.filter2D(kernel)) with callgrind(tag="Compute"): collection2 = rdd.compute() with rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], transform=info.transform) as dst: for t in collection2: (x0, y0, x1, y1) = t.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) for i in info.indexes: dst.write(t.img[i-1], window=Window(y0, x0, y1-y0, x1-x0), indexes=i) client.close()
def try_spark_filter2D(pathimage, kernel, output_pathimage): (collection, info) = build_collection_tile(pathimage) sc = SparkContext() profile_record_on() rdd = sc.parallelize(collection) rdd = rdd.map(lambda n: n.filter2D(kernel)) profile_record_off("profiling/rawdata/callgrind_graph.txt") profile_record_on() collection_res = rdd.collect() profile_record_off("profiling/rawdata/callgrind_compute.txt") img = np.empty((info.count, info.height, info.width)).astype(info.dtypes[0]) for tile in collection_res: (x0, y0, x1, y1) = tile.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) img[:, x0:x1, y0:y1] = tile.img res = rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], crs=info.crs, transform=info.transform) for i in range(res.count): res.write(img[i], i+1)
def try_dask_delayed_filter2D(pathimage, kernel, output_pathimage): client = Client() (collection, info) = build_collection_tile(pathimage) res = [] timer = time.time() for i in range(len(collection)): e = dask.delayed(collection[i].filter2D)(kernel) # e.visualize(filename='graph_'+str(i)+'.svg') res.append(e.compute()) timer = time.time() - timer with rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], transform=info.transform) as dst: for t in res: (x0, y0, x1, y1) = t.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) for i in info.indexes: dst.write(t.img[i - 1], window=Window(y0, x0, y1 - y0, x1 - x0), indexes=i) client.close() return timer
def try_spark_filter2D(pathimage, kernel, output_pathimage): (collection, info) = build_collection_tile(pathimage) sc = SparkContext() timer = time.time() rdd = sc.parallelize(collection) collection_res = [] for n in rdd.toLocalIterator(): collection_res.append(n.filter2D(kernel)) timer = time.time() - timer img = np.empty((info.count, info.height, info.width)).astype(info.dtypes[0]) for tile in collection_res: (x0, y0, x1, y1) = tile.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) img[:, x0:x1, y0:y1] = tile.img res = rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], crs=info.crs, transform=info.transform) for i in range(res.count): res.write(img[i], i+1) return timer
def try_dask_filter2D(pathimage, kernel, output_pathimage, output_graphSVG=None, profiling=False): client = Client() # client.upload_file("tile/tile.py") (collection, info) = build_collection_tile(pathimage) timer = time.time() rdd = db.from_sequence(collection).map(lambda n: n.filter2D(kernel)) timer_graph = time.time() - timer timer = time.time() collection2 = rdd.compute() timer_compute = time.time() - timer if output_graphSVG != None: rdd.visualize(filename=output_graphSVG) with rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], transform=info.transform) as dst: for t in collection2: (x0, y0, x1, y1) = t.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) for i in info.indexes: dst.write(t.img[i - 1], window=Window(y0, x0, y1 - y0, x1 - x0), indexes=i) client.close() return (timer_graph, timer_compute)
def try_spark_filter2D(pathimage, kernel, output_pathimage): (collection, info) = build_collection_tile(pathimage, 2000, 2000) # SparkContext.setSystemProperty('spark.driver.memory', '8g') # SparkContext.setSystemProperty('spark.executor.memory', '6G') conf = SparkConf() conf.set('spark.driver.memory', '4G') sc = SparkContext(conf=conf) timer = time.time() rdd = sc.parallelize(collection) rdd = rdd.map(lambda n: n.filter2D(kernel)) timer_graph = time.time() - timer timer = time.time() collection_res = rdd.collect() timer_compute = time.time() - timer img = np.empty( (info.count, info.height, info.width)).astype(info.dtypes[0]) for tile in collection_res: (x0, y0, x1, y1) = tile.bounding_polygon.bounds (x0, y0, x1, y1) = (int(x0), int(y0), int(x1), int(y1)) img[:, x0:x1, y0:y1] = tile.img res = rasterio.open(output_pathimage, 'w', driver=info.driver, width=info.width, height=info.height, count=info.count, dtype=info.dtypes[0], crs=info.crs, transform=info.transform) for i in range(res.count): res.write(img[i], i + 1) return (timer_graph, timer_compute)
def test_collection(data): (collection, info) = build_collection_tile( data.img_path, data.unit_height, data.unit_width) assert len(collection) == data.nb_expected_items