def draw_heat_map(spark): start_time = time.time() df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string" ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache() df.createOrReplaceTempView("nyc_taxi") register_funcs(spark) res = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, passenger_count as w from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))" ) res.show() vega = vega_heatmap(1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], 10.0, 'EPSG:4326') res = heatmap(vega, res) save_png(res, '/tmp/heatmap.png') spark.sql("show tables").show() spark.catalog.dropGlobalTempView("nyc_taxi") print("--- %s seconds ---" % (time.time() - start_time))
def draw_heat_map(spark): df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string" ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache() df.show(20, False) df.createOrReplaceTempView("nyc_taxi") # df.createOrReplaceGlobalTempView("nyc_taxi") res = spark.sql( "select pickup_latitude as x, pickup_longitude as y, passenger_count as w from nyc_taxi" ) res.printSchema() res.createOrReplaceTempView("pickup") register_funcs(spark) res = spark.sql( "select ST_Transform(ST_Point(x, y), 'EPSG:4326','EPSG:3857' ) as pickup_point, w from pickup" ) res.show(20, False) res.createOrReplaceTempView("project") res = spark.sql( "select Projection(pickup_point, 'POINT (4534000 -12510000)', 'POINT (4538000 -12513000)', 1024, 896) as point, w from project" ) res.show(20, False) vega_heat_map = VegaHeatMap(300, 200, 10.0) vega = vega_heat_map.build() res = heatmap(res, vega) save_png(res, '/tmp/heatmap.png') spark.sql("show tables").show() spark.catalog.dropGlobalTempView("nyc_taxi")
def draw_heat_map(spark): df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string" ).load(data_path).cache() df.show(20, False) df.createOrReplaceTempView("nyc_taxi") register_funcs(spark) res = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, passenger_count as w from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')" ) vega = vega_heatmap(1024, 896, 10.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') res = heatmap(res, vega) save_png(res, '/tmp/heatmap.png') spark.sql("show tables").show() spark.catalog.dropGlobalTempView("nyc_taxi")
def db_query(): """ /db/query handler """ log.INSTANCE.info('POST /db/query: {}'.format(request.json)) if not utils.check_json(request.json, 'id') \ or not utils.check_json(request.json, 'query') \ or not utils.check_json(request.json['query'], 'type') \ or not utils.check_json(request.json['query'], 'sql'): return jsonify(status='error', code=-1, message='query format error') query_sql = request.json['query']['sql'] query_type = request.json['query']['type'] content = {} content['sql'] = query_sql content['err'] = False db_instance = db.CENTER.get(str(request.json['id']), None) if db_instance is None: return jsonify(status="error", code=-1, message='there is no database whose id equal to ' + str(request.json['id'])) if query_type == 'sql': res = db_instance.run_for_json(query_sql) data = [] for row in res: obj = json.loads(row) data.append(obj) content['result'] = data else: if not utils.check_json(request.json['query'], 'params'): return jsonify(status='error', code=-1, message='query format error') query_params = request.json['query']['params'] res = db_instance.run(query_sql) if query_type == 'point': vega = vega_pointmap(int(query_params['width']), int(query_params['height']), query_params['point']['bounding_box'], int(query_params['point']['point_size']), query_params['point']['point_color'], float(query_params['point']['opacity']), query_params['point']['coordinate_system']) data = pointmap(vega, res) content['result'] = data elif query_type == 'heat': vega = vega_heatmap(int(query_params['width']), int(query_params['height']), query_params['heat']['bounding_box'], float(query_params['heat']['map_zoom_level']), query_params['heat']['coordinate_system'], query_params['heat']['aggregation_type']) data = heatmap(vega, res) content['result'] = data elif query_type == 'choropleth': vega = vega_choroplethmap( int(query_params['width']), int(query_params['height']), query_params['choropleth']['bounding_box'], query_params['choropleth']['color_gradient'], query_params['choropleth']['color_bound'], float(query_params['choropleth']['opacity']), query_params['choropleth']['coordinate_system'], query_params['choropleth']['aggregation_type']) data = choroplethmap(vega, res) content['result'] = data elif query_type == 'weighted': vega = vega_weighted_pointmap( int(query_params['width']), int(query_params['height']), query_params['weighted']['bounding_box'], query_params['weighted']['color_gradient'], query_params['weighted']['color_bound'], query_params['weighted']['size_bound'], float(query_params['weighted']['opacity']), query_params['weighted']['coordinate_system']) data = weighted_pointmap(vega, res) content['result'] = data elif query_type == 'icon': vega = vega_icon(int(query_params['width']), int(query_params['height']), query_params['icon']['bounding_box'], query_params['icon']['icon_path'], query_params['icon']['coordinate_system']) data = icon_viz(vega, res) content['result'] = data else: return jsonify(status="error", code=-1, message='{} not support'.format(query_type)) return jsonify(status="success", code=200, data=content)
def db_query(): """ /db/query handler """ if not utils.check_json(request.json, 'id') \ or not utils.check_json(request.json, 'query') \ or not utils.check_json(request.json['query'], 'type') \ or not utils.check_json(request.json['query'], 'sql'): return jsonify(status='error', code=-1, message='query format error') query_sql = request.json['query']['sql'] query_type = request.json['query']['type'] content = {} content['sql'] = query_sql content['err'] = False if query_type == 'sql': res = spark.Spark.run_for_json(query_sql) data = [] for row in res: obj = json.loads(row) data.append(obj) content['result'] = data else: if not utils.check_json(request.json['query'], 'params'): return jsonify(status='error', code=-1, message='query format error') query_params = request.json['query']['params'] res = spark.Spark.run(query_sql) if query_type == 'point': vega = vega_pointmap(int(query_params['width']), int(query_params['height']), query_params['point']['bounding_box'], int(query_params['point']['stroke_width']), query_params['point']['stroke'], float(query_params['point']['opacity']), query_params['point']['coordinate']) data = pointmap(res, vega) content['result'] = data elif query_type == 'heat': vega = vega_heatmap(int(query_params['width']), int(query_params['height']), float(query_params['heat']['map_scale']), query_params['heat']['bounding_box'], query_params['heat']['coordinate']) data = heatmap(res, vega) content['result'] = data elif query_type == 'choropleth': vega = vega_choroplethmap( int(query_params['width']), int(query_params['height']), query_params['choropleth']['bounding_box'], query_params['choropleth']['color_style'], query_params['choropleth']['rule'], float(query_params['choropleth']['opacity']), query_params['choropleth']['coordinate']) data = choroplethmap(res, vega) content['result'] = data else: return jsonify(status="error", code=-1, message='{} not support'.format(query_type)) return jsonify(status="success", code=200, data=content)
size_bound=[3, 15], opacity=1.0, coordinate_system="EPSG:4326") res = weighted_pointmap(vega, pickup_df) save_png(res, "/tmp/arctern_weighted_pointmap.png") # 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为热力值。 pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}" pickup_df = spark.sql(pickup_sql) # 根据查询结果绘制热力图图层。 vega = vega_heatmap(1024, 384, bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]], map_zoom_level=13.0, coordinate_system="EPSG:4326") res = heatmap(vega, pickup_df) save_png(res, "/tmp/arctern_heatmap.png") # 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为颜色权重。 pickup_sql = f"select ST_GeomFromText(buildingtext_pickup) as buildings, fare_amount as color_weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) and (buildingtext_pickup!='') limit {limit_num}" pickup_df = spark.sql(pickup_sql) # 根据查询结果绘制轮廓图图层。轮廓的填充颜色根据 color_weight 在 "#115f9a" ~ "#d0f400" 之间变化。 vega = vega_choroplethmap(1024, 384, bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]], color_gradient=["#115f9a", "#d0f400"], color_bound=[2.5, 5], opacity=1.0, coordinate_system="EPSG:4326") res = choroplethmap(vega, pickup_df) save_png(res, "/tmp/arctern_choroplethmap.png")
def run_test_heat_map(spark): df = spark.read.format("csv").option("header", True).option("delimiter", ",").schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, " "trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, " "dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, " "buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string").load( file_path).cache() df.createOrReplaceTempView("nyc_taxi") register_funcs(spark) res = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, passenger_count as w from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')") # 1 size:1024*896, map_scale: 10.0 vega_1 = vega_heatmap(1024, 896, 10.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') baseline1 = heatmap(res, vega_1) heat_map1_1 = heatmap(res, vega_1) heat_map1_2 = heatmap(res, vega_1) baseline_png1 = png_path + "heat_map_nyc_1.png" save_png(baseline1, baseline_png1) save_png(heat_map1_1, png_path + "test_heat_map_nyc_1-1.png") save_png(heat_map1_2, png_path + "test_heat_map_nyc_1-2.png") # 2 map_scale: 0.0 vega_2 = vega_heatmap(1024, 896, 0.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') baseline2 = heatmap(res, vega_2) heat_map2_1 = heatmap(res, vega_2) heat_map2_2 = heatmap(res, vega_2) baseline_png2 = png_path + "heat_map_nyc_2.png" save_png(baseline2, baseline_png2) save_png(heat_map2_1, png_path + "test_heat_map_nyc_2-1.png") save_png(heat_map2_2, png_path + "test_heat_map_nyc_2-2.png") # 3 map_scale: 12.0 vega_3 = vega_heatmap(1024, 896, 12.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') baseline3 = heatmap(res, vega_3) heat_map3_1 = heatmap(res, vega_3) heat_map3_2 = heatmap(res, vega_3) baseline_png3 = png_path + "heat_map_nyc_3.png" save_png(baseline3, baseline_png3) save_png(heat_map3_1, png_path + "test_heat_map_nyc_3-1.png") save_png(heat_map3_2, png_path + "test_heat_map_nyc_3-2.png") # 4 map_scale: 5.5 vega_4 = vega_heatmap(1024, 896, 5.5, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') baseline4 = heatmap(res, vega_4) heat_map4_1 = heatmap(res, vega_4) heat_map4_2 = heatmap(res, vega_4) baseline_png4 = png_path + "heat_map_nyc_4.png" save_png(baseline4, baseline_png4) save_png(heat_map4_1, png_path + "test_heat_map_nyc_4-1.png") save_png(heat_map4_2, png_path + "test_heat_map_nyc_4-2.png") # 5 size:200*200 vega_5 = vega_heatmap(200, 200, 10.0, [-73.998427, 40.730309, -73.954348, 40.780816], 'EPSG:4326') baseline5 = heatmap(res, vega_5) heat_map5_1 = heatmap(res, vega_5) heat_map5_2 = heatmap(res, vega_5) baseline_png5 = png_path + "heat_map_nyc_5.png" save_png(baseline5, baseline_png5) save_png(heat_map5_1, png_path + "test_heat_map_nyc_5-1.png") save_png(heat_map5_2, png_path + "test_heat_map_nyc_5-2.png") spark.catalog.dropGlobalTempView("nyc_taxi") assert run_diff_png(baseline_png1, png_path + "test_heat_map_nyc_1-1.png", 0.1) assert run_diff_png(baseline_png1, png_path + "test_heat_map_nyc_1-2.png", 0.1) assert run_diff_png(baseline_png2, png_path + "test_heat_map_nyc_2-1.png", 0.1) assert run_diff_png(baseline_png2, png_path + "test_heat_map_nyc_2-2.png", 0.1) assert run_diff_png(baseline_png3, png_path + "test_heat_map_nyc_3-1.png", 0.15) assert run_diff_png(baseline_png3, png_path + "test_heat_map_nyc_3-2.png", 0.15) assert run_diff_png(baseline_png4, png_path + "test_heat_map_nyc_4-1.png", 0.1) assert run_diff_png(baseline_png4, png_path + "test_heat_map_nyc_4-2.png", 0.1) assert run_diff_png(baseline_png5, png_path + "test_heat_map_nyc_5-1.png", 0.2) assert run_diff_png(baseline_png5, png_path + "test_heat_map_nyc_5-2.png", 0.2)