def draw_weighted_point_map(spark): start_time = time.time() df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string" ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache() df.createOrReplaceTempView("nyc_taxi") register_funcs(spark) # single color and single stroke width res1 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))" ) vega1 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#87CEEB"], [0, 2], [5], 1.0, "EPSG:4326") res1 = weighted_pointmap(vega1, res1) save_png(res1, '/tmp/weighted_pointmap_0_0.png') # multiple color and single stroke width res2 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))" ) vega2 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#0000FF", "#FF0000"], [0, 2], [5], 1.0, "EPSG:4326") res2 = weighted_pointmap(vega2, res2) save_png(res2, '/tmp/weighted_pointmap_1_0.png') # single color and multiple stroke width res3 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))" ) vega3 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#87CEEB"], [0, 2], [0, 10], 1.0, "EPSG:4326") res3 = weighted_pointmap(vega3, res3) save_png(res3, '/tmp/weighted_pointmap_0_1.png') # multiple color and multiple stroke width res4 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))" ) vega4 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#0000FF", "#FF0000"], [0, 2], [0, 10], 1.0, "EPSG:4326") res4 = weighted_pointmap(vega4, res4) save_png(res4, '/tmp/weighted_pointmap_1_1.png') spark.sql("show tables").show() spark.catalog.dropGlobalTempView("nyc_taxi") print("--- %s seconds ---" % (time.time() - start_time))
def draw_weighted_point_map(spark): df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string" ).load(data_path).cache() df.show(20, False) df.createOrReplaceTempView("nyc_taxi") register_funcs(spark) # single color and single stroke width res1 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')" ) vega1 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "#87CEEB", [0, 2], [5], 1.0, "EPSG:4326") res1 = weighted_pointmap(res1, vega1) save_png(res1, '/tmp/weighted_pointmap_0_0.png') # multiple color and single stroke width res2 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')" ) vega2 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "blue_to_red", [0, 2], [5], 1.0, "EPSG:4326") res2 = weighted_pointmap(res2, vega2) save_png(res2, '/tmp/weighted_pointmap_1_0.png') # single color and multiple stroke width res3 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')" ) vega3 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "#87CEEB", [0, 2], [0, 10], 1.0, "EPSG:4326") res3 = weighted_pointmap(res3, vega3) save_png(res3, '/tmp/weighted_pointmap_0_1.png') # multiple color and multiple stroke width res4 = spark.sql( "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude), 'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')" ) vega4 = vega_weighted_pointmap( 1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "blue_to_red", [0, 2], [0, 10], 1.0, "EPSG:4326") res4 = weighted_pointmap(res4, vega4) save_png(res4, '/tmp/weighted_pointmap_1_1.png') spark.sql("show tables").show() spark.catalog.dropGlobalTempView("nyc_taxi")
def draw_world_weighted_point_map(spark): df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "continent string, country string, locationId string, longitude double, latitude double," "currentConfirmedCount int, confirmedCount int, suspectedCount int, curedCount int, deadCount int, " "updateTime timestamp").load(country_csv).cache() df.createOrReplaceTempView("COVID_country") register_funcs(spark) # 1 res1 = spark.sql( "select ST_Point(longitude, latitude) as point from COVID_country ") res1.createOrReplaceTempView("res1") res1 = spark.sql("select * from res1 where point != 'POINT (nan nan)' ") res1.show(20, False) vega1 = vega_weighted_pointmap( 3000, 2000, [-289.095983, -73.863121, 289.095983, 73.863121], "#EEEEEE", [2, 60], [6], 1.0, "EPSG:4326") res_png1 = weighted_pointmap(res1, vega1) save_png(res_png1, './COVID_country_weighted_point_map1.png') spark.catalog.dropGlobalTempView("COVID_country")
def draw_world_include_province_weighted_point_map(spark): # 1 df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "Province string, Country string, Longitude double, Latitude double, ConfirmedCount int," "DeadCount int, CuredCount int, LastUpdateTime string").load( country_with_province_csv).cache() df.createOrReplaceTempView("COVID_country_province") register_funcs(spark) res2 = spark.sql( "select ST_Point(Longitude, Latitude) as point, ConfirmedCount as s from COVID_country_province " "where LastUpdateTime like '%03-29%'") res2.createOrReplaceTempView("res2") res2 = spark.sql("select * from res2 where point != 'POINT (nan nan)' ") vega2 = vega_weighted_pointmap( 3000, 2000, [-289.095983, -73.863121, 289.095983, 73.863121], "#F0356D", [2, 60], [6, 60], 1.0, "EPSG:4326") res_png2 = weighted_pointmap(res2, vega2) save_png(res_png2, './COVID_country_weighted_point_map2.png') spark.catalog.dropGlobalTempView("COVID_country_province")
def db_query(): """ /db/query handler """ log.INSTANCE.info('POST /db/query: {}'.format(request.json)) if not utils.check_json(request.json, 'id') \ or not utils.check_json(request.json, 'query') \ or not utils.check_json(request.json['query'], 'type') \ or not utils.check_json(request.json['query'], 'sql'): return jsonify(status='error', code=-1, message='query format error') query_sql = request.json['query']['sql'] query_type = request.json['query']['type'] content = {} content['sql'] = query_sql content['err'] = False db_instance = db.CENTER.get(str(request.json['id']), None) if db_instance is None: return jsonify(status="error", code=-1, message='there is no database whose id equal to ' + str(request.json['id'])) if query_type == 'sql': res = db_instance.run_for_json(query_sql) data = [] for row in res: obj = json.loads(row) data.append(obj) content['result'] = data else: if not utils.check_json(request.json['query'], 'params'): return jsonify(status='error', code=-1, message='query format error') query_params = request.json['query']['params'] res = db_instance.run(query_sql) if query_type == 'point': vega = vega_pointmap(int(query_params['width']), int(query_params['height']), query_params['point']['bounding_box'], int(query_params['point']['point_size']), query_params['point']['point_color'], float(query_params['point']['opacity']), query_params['point']['coordinate_system']) data = pointmap(vega, res) content['result'] = data elif query_type == 'heat': vega = vega_heatmap(int(query_params['width']), int(query_params['height']), query_params['heat']['bounding_box'], float(query_params['heat']['map_zoom_level']), query_params['heat']['coordinate_system'], query_params['heat']['aggregation_type']) data = heatmap(vega, res) content['result'] = data elif query_type == 'choropleth': vega = vega_choroplethmap( int(query_params['width']), int(query_params['height']), query_params['choropleth']['bounding_box'], query_params['choropleth']['color_gradient'], query_params['choropleth']['color_bound'], float(query_params['choropleth']['opacity']), query_params['choropleth']['coordinate_system'], query_params['choropleth']['aggregation_type']) data = choroplethmap(vega, res) content['result'] = data elif query_type == 'weighted': vega = vega_weighted_pointmap( int(query_params['width']), int(query_params['height']), query_params['weighted']['bounding_box'], query_params['weighted']['color_gradient'], query_params['weighted']['color_bound'], query_params['weighted']['size_bound'], float(query_params['weighted']['opacity']), query_params['weighted']['coordinate_system']) data = weighted_pointmap(vega, res) content['result'] = data elif query_type == 'icon': vega = vega_icon(int(query_params['width']), int(query_params['height']), query_params['icon']['bounding_box'], query_params['icon']['icon_path'], query_params['icon']['coordinate_system']) data = icon_viz(vega, res) content['result'] = data else: return jsonify(status="error", code=-1, message='{} not support'.format(query_type)) return jsonify(status="success", code=200, data=content)
def draw_china_weighted_point_map(spark): df = spark.read.format("csv").option("header", True).option( "delimiter", "," ).schema( "continent string, country string, province string, provinceLocationId string, " "provinceCurrentConfirmedCount int , provinceConfirmedCount int, provinceSuspectedCount int," "provinceCuredCount int, provinceDeadCount int, cityName string, longitude double, latitude double," "cityLocationId string, cityCurrentConfirmedCount int, cityConfirmedCount int, citySuspectedCount int," "cityCuredCount int, cityDeadCount int, updateTime timestamp").load( china_csv).cache() spark.catalog.dropGlobalTempView("COVID_china") df.createOrReplaceTempView("COVID_china") register_funcs(spark) # 1 res1 = spark.sql( "select ST_Point(longitude, latitude) as point from COVID_china where ST_Within(ST_Point(longitude, latitude), 'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533, 71.604264 53.808533, 71.604264 17.258977))')" ) res1.createOrReplaceTempView("res1") res1 = spark.sql("select * from res1 where point != 'POINT (nan nan)' ") vega1 = vega_weighted_pointmap( 1024, 896, [71.604264, 17.258977, 137.319408, 53.808533], "#EEEEEE", [2, 60], [6], 1.0, "EPSG:4326") res_png1 = weighted_pointmap(res1, vega1) save_png(res_png1, './COVID_china_weighted_point_map1.png') # 2 res2 = spark.sql( "select ST_Point(longitude, latitude) as point, provinceConfirmedCount as c from COVID_china " "where ST_Within(ST_Point(longitude, latitude), " "'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533," " 71.604264 53.808533, 71.604264 17.258977))')") res2.createOrReplaceTempView("res2") res2 = spark.sql("select * from res2 where point != 'POINT (nan nan)' ") vega2 = vega_weighted_pointmap( 1024, 896, [71.604264, 17.258977, 137.319408, 53.808533], "blue_to_red", [2, 1000], [6], 1.0, "EPSG:4326") res_png2 = weighted_pointmap(res2, vega2) save_png(res_png2, './COVID_china_weighted_point_map2.png') # 3 res3 = spark.sql( "select ST_Point(longitude, latitude) as point, provinceConfirmedCount as c, " "provinceConfirmedCount as s from COVID_china " "where ST_Within(ST_Point(longitude, latitude), " "'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533," " 71.604264 53.808533, 71.604264 17.258977))')") res3.createOrReplaceTempView("res3") res3 = spark.sql("select * from res3 where point != 'POINT (nan nan)' ") vega3 = vega_weighted_pointmap( 3000, 2000, [71.604264, 17.258977, 137.319408, 53.808533], "blue_to_red", [2, 1000], [5, 1000], 1.0, "EPSG:4326") res_png3 = weighted_pointmap(res3, vega3) save_png(res_png3, './COVID_china_weighted_point_map3.png') spark.catalog.dropGlobalTempView("COVID_china")
save_png(res, '/tmp/arctern_pointmap.png') # 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为颜色权重、total_amount 作为大小权重。 pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as color_weight, total_amount as size_weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}" pickup_df = spark.sql(pickup_sql) # 根据查询结果绘制带权点图图层。点的颜色根据 color_weight 在 "#115f9a" ~ "#d0f400" 之间变化,点的大小根据 size_weight 在 3 ~ 15 之间变化。 vega = vega_weighted_pointmap( 1024, 384, bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]], color_gradient=["#115f9a", "#d0f400"], color_bound=[1, 50], size_bound=[3, 15], opacity=1.0, coordinate_system="EPSG:4326") res = weighted_pointmap(vega, pickup_df) save_png(res, "/tmp/arctern_weighted_pointmap.png") # 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为热力值。 pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}" pickup_df = spark.sql(pickup_sql) # 根据查询结果绘制热力图图层。 vega = vega_heatmap(1024, 384, bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]], map_zoom_level=13.0, coordinate_system="EPSG:4326") res = heatmap(vega, pickup_df) save_png(res, "/tmp/arctern_heatmap.png") # 在指定地理区域(经度范围:-73.991504 至 -73.945155;纬度范围:40.770759 至 40.783434)中随机选取 200 个坐标点,并将 fare_amount 作为颜色权重。