Python weighted_pointmap示例，arctern_pyspark.weighted_pointmap Python示例

示例#1

0

显示文件

def draw_weighted_point_map(spark):
    start_time = time.time()
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load("file:///tmp/0_5M_nyc_taxi_and_building.csv").cache()
    df.createOrReplaceTempView("nyc_taxi")

    register_funcs(spark)

    # single color and single stroke width
    res1 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))"
    )
    vega1 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#87CEEB"],
        [0, 2], [5], 1.0, "EPSG:4326")
    res1 = weighted_pointmap(vega1, res1)
    save_png(res1, '/tmp/weighted_pointmap_0_0.png')

    # multiple color and single stroke width
    res2 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))"
    )
    vega2 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816],
        ["#0000FF", "#FF0000"], [0, 2], [5], 1.0, "EPSG:4326")
    res2 = weighted_pointmap(vega2, res2)
    save_png(res2, '/tmp/weighted_pointmap_1_0.png')

    # single color and multiple stroke width
    res3 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))"
    )
    vega3 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], ["#87CEEB"],
        [0, 2], [0, 10], 1.0, "EPSG:4326")
    res3 = weighted_pointmap(vega3, res3)
    save_png(res3, '/tmp/weighted_pointmap_0_1.png')

    # multiple color and multiple stroke width
    res4 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  ST_GeomFromText('POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))'))"
    )
    vega4 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816],
        ["#0000FF", "#FF0000"], [0, 2], [0, 10], 1.0, "EPSG:4326")
    res4 = weighted_pointmap(vega4, res4)
    save_png(res4, '/tmp/weighted_pointmap_1_1.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")
    print("--- %s seconds ---" % (time.time() - start_time))

示例#2

0

显示文件

def draw_weighted_point_map(spark):
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "VendorID string, tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance double, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, fare_amount double, tip_amount double, total_amount double, buildingid_pickup long, buildingid_dropoff long, buildingtext_pickup string, buildingtext_dropoff string"
    ).load(data_path).cache()
    df.show(20, False)
    df.createOrReplaceTempView("nyc_taxi")

    register_funcs(spark)

    # single color and single stroke width
    res1 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')"
    )
    vega1 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "#87CEEB",
        [0, 2], [5], 1.0, "EPSG:4326")
    res1 = weighted_pointmap(res1, vega1)
    save_png(res1, '/tmp/weighted_pointmap_0_0.png')

    # multiple color and single stroke width
    res2 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')"
    )
    vega2 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816],
        "blue_to_red", [0, 2], [5], 1.0, "EPSG:4326")
    res2 = weighted_pointmap(res2, vega2)
    save_png(res2, '/tmp/weighted_pointmap_1_0.png')

    # single color and multiple stroke width
    res3 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')"
    )
    vega3 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816], "#87CEEB",
        [0, 2], [0, 10], 1.0, "EPSG:4326")
    res3 = weighted_pointmap(res3, vega3)
    save_png(res3, '/tmp/weighted_pointmap_0_1.png')

    # multiple color and multiple stroke width
    res4 = spark.sql(
        "select ST_Point(pickup_longitude, pickup_latitude) as point, tip_amount as c, fare_amount as s from nyc_taxi where ST_Within(ST_Point(pickup_longitude, pickup_latitude),  'POLYGON ((-73.998427 40.730309, -73.954348 40.730309, -73.954348 40.780816 ,-73.998427 40.780816, -73.998427 40.730309))')"
    )
    vega4 = vega_weighted_pointmap(
        1024, 896, [-73.998427, 40.730309, -73.954348, 40.780816],
        "blue_to_red", [0, 2], [0, 10], 1.0, "EPSG:4326")
    res4 = weighted_pointmap(res4, vega4)
    save_png(res4, '/tmp/weighted_pointmap_1_1.png')

    spark.sql("show tables").show()
    spark.catalog.dropGlobalTempView("nyc_taxi")

示例#3

0

显示文件

文件： draw_point_map.py 项目： xiaocai2333/COVID-19-demo

def draw_world_weighted_point_map(spark):
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "continent string, country string, locationId string, longitude double, latitude double,"
        "currentConfirmedCount int, confirmedCount int, suspectedCount int, curedCount int, deadCount int, "
        "updateTime timestamp").load(country_csv).cache()

    df.createOrReplaceTempView("COVID_country")

    register_funcs(spark)

    # 1
    res1 = spark.sql(
        "select ST_Point(longitude, latitude) as point from COVID_country ")
    res1.createOrReplaceTempView("res1")
    res1 = spark.sql("select * from res1 where point != 'POINT (nan nan)' ")
    res1.show(20, False)
    vega1 = vega_weighted_pointmap(
        3000, 2000, [-289.095983, -73.863121, 289.095983, 73.863121],
        "#EEEEEE", [2, 60], [6], 1.0, "EPSG:4326")
    res_png1 = weighted_pointmap(res1, vega1)
    save_png(res_png1, './COVID_country_weighted_point_map1.png')

    spark.catalog.dropGlobalTempView("COVID_country")

示例#4

0

显示文件

文件： draw_point_map.py 项目： xiaocai2333/COVID-19-demo

def draw_world_include_province_weighted_point_map(spark):
    # 1
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "Province string, Country string, Longitude double, Latitude double, ConfirmedCount int,"
        "DeadCount int, CuredCount int, LastUpdateTime string").load(
            country_with_province_csv).cache()

    df.createOrReplaceTempView("COVID_country_province")

    register_funcs(spark)

    res2 = spark.sql(
        "select ST_Point(Longitude, Latitude) as point, ConfirmedCount as s from COVID_country_province "
        "where LastUpdateTime like '%03-29%'")
    res2.createOrReplaceTempView("res2")
    res2 = spark.sql("select * from res2 where point != 'POINT (nan nan)' ")
    vega2 = vega_weighted_pointmap(
        3000, 2000, [-289.095983, -73.863121, 289.095983, 73.863121],
        "#F0356D", [2, 60], [6, 60], 1.0, "EPSG:4326")
    res_png2 = weighted_pointmap(res2, vega2)
    save_png(res_png2, './COVID_country_weighted_point_map2.png')

    spark.catalog.dropGlobalTempView("COVID_country_province")

示例#5

0

显示文件

文件： service.py 项目： quziyan/arctern

def db_query():
    """
    /db/query handler
    """
    log.INSTANCE.info('POST /db/query: {}'.format(request.json))

    if not utils.check_json(request.json, 'id') \
            or not utils.check_json(request.json, 'query') \
            or not utils.check_json(request.json['query'], 'type') \
            or not utils.check_json(request.json['query'], 'sql'):
        return jsonify(status='error', code=-1, message='query format error')

    query_sql = request.json['query']['sql']
    query_type = request.json['query']['type']

    content = {}
    content['sql'] = query_sql
    content['err'] = False

    db_instance = db.CENTER.get(str(request.json['id']), None)
    if db_instance is None:
        return jsonify(status="error",
                       code=-1,
                       message='there is no database whose id equal to ' +
                       str(request.json['id']))

    if query_type == 'sql':
        res = db_instance.run_for_json(query_sql)
        data = []
        for row in res:
            obj = json.loads(row)
            data.append(obj)
        content['result'] = data
    else:
        if not utils.check_json(request.json['query'], 'params'):
            return jsonify(status='error',
                           code=-1,
                           message='query format error')
        query_params = request.json['query']['params']

        res = db_instance.run(query_sql)

        if query_type == 'point':
            vega = vega_pointmap(int(query_params['width']),
                                 int(query_params['height']),
                                 query_params['point']['bounding_box'],
                                 int(query_params['point']['point_size']),
                                 query_params['point']['point_color'],
                                 float(query_params['point']['opacity']),
                                 query_params['point']['coordinate_system'])
            data = pointmap(vega, res)
            content['result'] = data
        elif query_type == 'heat':
            vega = vega_heatmap(int(query_params['width']),
                                int(query_params['height']),
                                query_params['heat']['bounding_box'],
                                float(query_params['heat']['map_zoom_level']),
                                query_params['heat']['coordinate_system'],
                                query_params['heat']['aggregation_type'])
            data = heatmap(vega, res)
            content['result'] = data
        elif query_type == 'choropleth':
            vega = vega_choroplethmap(
                int(query_params['width']), int(query_params['height']),
                query_params['choropleth']['bounding_box'],
                query_params['choropleth']['color_gradient'],
                query_params['choropleth']['color_bound'],
                float(query_params['choropleth']['opacity']),
                query_params['choropleth']['coordinate_system'],
                query_params['choropleth']['aggregation_type'])
            data = choroplethmap(vega, res)
            content['result'] = data
        elif query_type == 'weighted':
            vega = vega_weighted_pointmap(
                int(query_params['width']), int(query_params['height']),
                query_params['weighted']['bounding_box'],
                query_params['weighted']['color_gradient'],
                query_params['weighted']['color_bound'],
                query_params['weighted']['size_bound'],
                float(query_params['weighted']['opacity']),
                query_params['weighted']['coordinate_system'])
            data = weighted_pointmap(vega, res)
            content['result'] = data
        elif query_type == 'icon':
            vega = vega_icon(int(query_params['width']),
                             int(query_params['height']),
                             query_params['icon']['bounding_box'],
                             query_params['icon']['icon_path'],
                             query_params['icon']['coordinate_system'])
            data = icon_viz(vega, res)
            content['result'] = data
        else:
            return jsonify(status="error",
                           code=-1,
                           message='{} not support'.format(query_type))

    return jsonify(status="success", code=200, data=content)

示例#6

0

显示文件

文件： draw_point_map.py 项目： xiaocai2333/COVID-19-demo

def draw_china_weighted_point_map(spark):
    df = spark.read.format("csv").option("header", True).option(
        "delimiter", ","
    ).schema(
        "continent string, country string, province string, provinceLocationId string, "
        "provinceCurrentConfirmedCount int , provinceConfirmedCount int, provinceSuspectedCount int,"
        "provinceCuredCount int, provinceDeadCount int, cityName string, longitude double, latitude double,"
        "cityLocationId string, cityCurrentConfirmedCount int, cityConfirmedCount int, citySuspectedCount int,"
        "cityCuredCount int, cityDeadCount int, updateTime timestamp").load(
            china_csv).cache()

    spark.catalog.dropGlobalTempView("COVID_china")

    df.createOrReplaceTempView("COVID_china")

    register_funcs(spark)

    # 1
    res1 = spark.sql(
        "select ST_Point(longitude, latitude) as point from COVID_china where ST_Within(ST_Point(longitude, latitude), 'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533, 71.604264 53.808533, 71.604264 17.258977))')"
    )
    res1.createOrReplaceTempView("res1")
    res1 = spark.sql("select * from res1 where point != 'POINT (nan nan)' ")

    vega1 = vega_weighted_pointmap(
        1024, 896, [71.604264, 17.258977, 137.319408, 53.808533], "#EEEEEE",
        [2, 60], [6], 1.0, "EPSG:4326")
    res_png1 = weighted_pointmap(res1, vega1)
    save_png(res_png1, './COVID_china_weighted_point_map1.png')

    # 2
    res2 = spark.sql(
        "select ST_Point(longitude, latitude) as point, provinceConfirmedCount as c from COVID_china "
        "where ST_Within(ST_Point(longitude, latitude), "
        "'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533,"
        " 71.604264 53.808533, 71.604264 17.258977))')")

    res2.createOrReplaceTempView("res2")
    res2 = spark.sql("select * from res2 where point != 'POINT (nan nan)' ")

    vega2 = vega_weighted_pointmap(
        1024, 896, [71.604264, 17.258977, 137.319408, 53.808533],
        "blue_to_red", [2, 1000], [6], 1.0, "EPSG:4326")

    res_png2 = weighted_pointmap(res2, vega2)
    save_png(res_png2, './COVID_china_weighted_point_map2.png')

    # 3
    res3 = spark.sql(
        "select ST_Point(longitude, latitude) as point, provinceConfirmedCount as c, "
        "provinceConfirmedCount as s from COVID_china "
        "where ST_Within(ST_Point(longitude, latitude), "
        "'POLYGON ((71.604264 17.258977, 137.319408 17.258977, 137.319408 53.808533,"
        " 71.604264 53.808533, 71.604264 17.258977))')")
    res3.createOrReplaceTempView("res3")
    res3 = spark.sql("select * from res3 where point != 'POINT (nan nan)' ")

    vega3 = vega_weighted_pointmap(
        3000, 2000, [71.604264, 17.258977, 137.319408, 53.808533],
        "blue_to_red", [2, 1000], [5, 1000], 1.0, "EPSG:4326")

    res_png3 = weighted_pointmap(res3, vega3)
    save_png(res_png3, './COVID_china_weighted_point_map3.png')
    spark.catalog.dropGlobalTempView("COVID_china")

示例#7

0

显示文件

save_png(res, '/tmp/arctern_pointmap.png')

# 在指定地理区域（经度范围：-73.991504 至 -73.945155；纬度范围：40.770759 至 40.783434）中随机选取 200 个坐标点，并将 fare_amount 作为颜色权重、total_amount 作为大小权重。
pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as color_weight, total_amount as size_weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}"
pickup_df = spark.sql(pickup_sql)
# 根据查询结果绘制带权点图图层。点的颜色根据 color_weight 在 "#115f9a" ~ "#d0f400" 之间变化，点的大小根据 size_weight 在 3 ~ 15 之间变化。
vega = vega_weighted_pointmap(
    1024,
    384,
    bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]],
    color_gradient=["#115f9a", "#d0f400"],
    color_bound=[1, 50],
    size_bound=[3, 15],
    opacity=1.0,
    coordinate_system="EPSG:4326")
res = weighted_pointmap(vega, pickup_df)
save_png(res, "/tmp/arctern_weighted_pointmap.png")

# 在指定地理区域（经度范围：-73.991504 至 -73.945155；纬度范围：40.770759 至 40.783434）中随机选取 200 个坐标点，并将 fare_amount 作为热力值。
pickup_sql = f"select st_point(pickup_longitude, pickup_latitude) as point, fare_amount as weight from nyc_taxi where (pickup_longitude between {pos1[0]} and {pos2[0]}) and (pickup_latitude between {pos1[1]} and {pos2[1]}) limit {limit_num}"
pickup_df = spark.sql(pickup_sql)
# 根据查询结果绘制热力图图层。
vega = vega_heatmap(1024,
                    384,
                    bounding_box=[pos1[0], pos1[1], pos2[0], pos2[1]],
                    map_zoom_level=13.0,
                    coordinate_system="EPSG:4326")
res = heatmap(vega, pickup_df)
save_png(res, "/tmp/arctern_heatmap.png")

# 在指定地理区域（经度范围：-73.991504 至 -73.945155；纬度范围：40.770759 至 40.783434）中随机选取 200 个坐标点，并将 fare_amount 作为颜色权重。