import os

from lib.geolib_helper import get_shp_filepath, load_normalize_gov_shp_data

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = os.path.dirname(CURRENT_DIR)

if __name__ == '__main__':
    data_dir = os.path.join(BASE_DIR, 'data')
    area_dimension_table_filepath = os.path.join(data_dir,
                                                 'area_dimension_table.csv')
    village_shp_filepath = get_shp_filepath(
        os.path.join(data_dir, 'taiwan_twd97_map_data_village'))

    data_mart_dirpath = os.path.join(data_dir, 'aggregated-data_mart')
    output_village_filepath = os.path.join(
        data_mart_dirpath, 'simplified_taipei_village.geojson')
    output_township_filepath = os.path.join(
        data_mart_dirpath, 'simplified_taipei_township.geojson')

    village_gpd = load_normalize_gov_shp_data(village_shp_filepath)

    taipei_village_gpd = village_gpd[village_gpd['county_chinese_name'] ==
                                     '臺北市']
    taipei_village_gpd.set_index('village_code', drop=False, inplace=True)

    taipei_township_gpd = taipei_village_gpd[['township_code', 'geometry'
                                              ]].dissolve(by='township_code')

    # select tolerance of 0.05km, 1 point is about 111 km
    tolerance = 0.05 / 111
示例#2
0
import os
import pandas as pd
pd.set_option("mode.chained_assignment", None)

from lib.geolib_helper import get_shp_filepath, load_normalize_gov_shp_data

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = os.path.dirname(CURRENT_DIR)


if __name__ == '__main__':
    data_dir = os.path.join(BASE_DIR, 'data')
    village_shp_filepath = get_shp_filepath(os.path.join(BASE_DIR, 'data', 'taiwan_twd97_map_data_village'))

    filename = 'taipei_village_centroid_distance_km_matrix.csv'
    output_filepath_list = [
        os.path.join(CURRENT_DIR, 'data', filename),
        os.path.join(data_dir, 'normalized-data_warehouse', filename)
    ]

    output_filepath = os.path.join(CURRENT_DIR, 'data', 'taipei_village_centroid_distance_km_matrix.csv')

    villages_shp_gdp = load_normalize_gov_shp_data(village_shp_filepath)

    taipei_area_only = villages_shp_gdp[villages_shp_gdp['county_chinese_name'] == '臺北市']
    taipei_area_only.set_index('village_code', inplace=True)

    taipei_area_only['centroid'] = taipei_area_only.centroid

    distance_km_matrix_dict = {}
    for index, row in taipei_area_only.iterrows():
示例#3
0
    county_data_dirpah = os.path.join(data_dir, 'taiwan_twd97_map_data_county')
    town_data_dirpath = os.path.join(data_dir,
                                     'taiwan_twd97_map_data_township')
    village_data_dirpath = os.path.join(data_dir,
                                        'taiwan_twd97_map_data_village')

    filename = 'area_dimension_table.csv'
    output_filepath_list = [
        os.path.join(CURRENT_DIR, 'data', filename),
        os.path.join(
            data_dir, 'normalized-data_warehouse', filename
        ),  # to make things easier to lookup for non-technical user
    ]

    county_shp = gpd.read_file(get_shp_filepath(county_data_dirpah))
    town_shp = gpd.read_file(get_shp_filepath(town_data_dirpath))
    village_shp = gpd.read_file(get_shp_filepath(village_data_dirpath))

    # delete geometry data, make into simple df column with dimension data
    county_df = county_shp.drop('geometry', axis=1)
    town_df = town_shp.drop('geometry', axis=1)
    village_df = village_shp.drop('geometry', axis=1)

    # fix encoding on some bad chinese character encoding
    change_on_multiple_columns(town_df, lambda x: 'name' in x.casefold(),
                               try_fix_encoding)
    change_on_multiple_columns(county_df, lambda x: 'name' in x.casefold(),
                               try_fix_encoding)
    change_on_multiple_columns(village_df, lambda x: 'name' in x.casefold(),
                               try_fix_encoding)