import os from lib.geolib_helper import get_shp_filepath, load_normalize_gov_shp_data CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(CURRENT_DIR) if __name__ == '__main__': data_dir = os.path.join(BASE_DIR, 'data') area_dimension_table_filepath = os.path.join(data_dir, 'area_dimension_table.csv') village_shp_filepath = get_shp_filepath( os.path.join(data_dir, 'taiwan_twd97_map_data_village')) data_mart_dirpath = os.path.join(data_dir, 'aggregated-data_mart') output_village_filepath = os.path.join( data_mart_dirpath, 'simplified_taipei_village.geojson') output_township_filepath = os.path.join( data_mart_dirpath, 'simplified_taipei_township.geojson') village_gpd = load_normalize_gov_shp_data(village_shp_filepath) taipei_village_gpd = village_gpd[village_gpd['county_chinese_name'] == '臺北市'] taipei_village_gpd.set_index('village_code', drop=False, inplace=True) taipei_township_gpd = taipei_village_gpd[['township_code', 'geometry' ]].dissolve(by='township_code') # select tolerance of 0.05km, 1 point is about 111 km tolerance = 0.05 / 111
import os import pandas as pd pd.set_option("mode.chained_assignment", None) from lib.geolib_helper import get_shp_filepath, load_normalize_gov_shp_data CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(CURRENT_DIR) if __name__ == '__main__': data_dir = os.path.join(BASE_DIR, 'data') village_shp_filepath = get_shp_filepath(os.path.join(BASE_DIR, 'data', 'taiwan_twd97_map_data_village')) filename = 'taipei_village_centroid_distance_km_matrix.csv' output_filepath_list = [ os.path.join(CURRENT_DIR, 'data', filename), os.path.join(data_dir, 'normalized-data_warehouse', filename) ] output_filepath = os.path.join(CURRENT_DIR, 'data', 'taipei_village_centroid_distance_km_matrix.csv') villages_shp_gdp = load_normalize_gov_shp_data(village_shp_filepath) taipei_area_only = villages_shp_gdp[villages_shp_gdp['county_chinese_name'] == '臺北市'] taipei_area_only.set_index('village_code', inplace=True) taipei_area_only['centroid'] = taipei_area_only.centroid distance_km_matrix_dict = {} for index, row in taipei_area_only.iterrows():
county_data_dirpah = os.path.join(data_dir, 'taiwan_twd97_map_data_county') town_data_dirpath = os.path.join(data_dir, 'taiwan_twd97_map_data_township') village_data_dirpath = os.path.join(data_dir, 'taiwan_twd97_map_data_village') filename = 'area_dimension_table.csv' output_filepath_list = [ os.path.join(CURRENT_DIR, 'data', filename), os.path.join( data_dir, 'normalized-data_warehouse', filename ), # to make things easier to lookup for non-technical user ] county_shp = gpd.read_file(get_shp_filepath(county_data_dirpah)) town_shp = gpd.read_file(get_shp_filepath(town_data_dirpath)) village_shp = gpd.read_file(get_shp_filepath(village_data_dirpath)) # delete geometry data, make into simple df column with dimension data county_df = county_shp.drop('geometry', axis=1) town_df = town_shp.drop('geometry', axis=1) village_df = village_shp.drop('geometry', axis=1) # fix encoding on some bad chinese character encoding change_on_multiple_columns(town_df, lambda x: 'name' in x.casefold(), try_fix_encoding) change_on_multiple_columns(county_df, lambda x: 'name' in x.casefold(), try_fix_encoding) change_on_multiple_columns(village_df, lambda x: 'name' in x.casefold(), try_fix_encoding)