def col_list_val_to_row(pndDf, colWithLists, geomCol=None, epsg=None): """ Convert a dataframe: | col_a | col_b | col_c 0 | X | X | 1 1 | X | X | [2,3] To: | col_a | col_b | col_c 0 | X | X | 1 1 | X | X | 2 2 | X | X | 3 """ def desmembrate(row, row_acc, target_col): if type(row[target_col]) != list: row_acc.append(row.to_dict()) else: for geom in row[target_col]: new_row = row.to_dict() new_row[target_col] = geom row_acc.append(new_row) new_rows = [] pndDf.apply(lambda x: desmembrate(x, new_rows, colWithLists), axis=1) # Convert again to DataFrame if geomCol and epsg: from glass.g.it.pd import df_to_geodf return df_to_geodf(new_rows, geomCol, epsg) else: import pandas return pandas.DataFrame(new_rows)
def lst_prod_by_cell_and_year(shp, id_col, year, outshp, platform="Sentinel-2", processingl='Level-2A', epsg=32629): """ Get a list of images: * one for each grid in shp; * one for each month in one year - the choosen image will be the one with lesser area occupied by clouds; total_images = grid_number * number_months_year """ from glass.g.rd.shp import shp_to_obj from glass.ng.pd import merge_df from glass.g.wt.shp import df_to_shp from glass.g.it.pd import df_to_geodf months = { '01': '31', '02': '28', '03': '31', '04': '30', '05': '31', '06': '30', '07': '31', '08': '31', '09': '30', '10': '31', '11': '30', '12': '31' } # Open SHP grid = shp_to_obj(shp, srs_to=4326) def get_grid_id(row): row['cellid'] = row.title.split('_')[5][1:] return row # Search for images dfs = [] for idx, cell in grid.iterrows(): for k in months: start = "{}{}01".format(str(year), k) end = "{}{}{}".format(str(year), k, months[k]) if year == 2018 and processingl == 'Level-2A': if k == '01' or k == '02': plevel = 'Level-2Ap' else: plevel = processingl else: plevel = processingl prod = lst_prod(cell.geometry.wkt, start, end, platname=platform, procLevel=plevel) if not prod.shape[0]: continue # Get area prod = prod.to_crs('EPSG:{}'.format(str(epsg))) prod['areav'] = prod.geometry.area / 1000000 # We want only images with more than 70% of data prod = prod[prod.areav >= 7000] # ID Cell ID prod = prod.apply(lambda x: get_grid_id(x), axis=1) # Filter Cell ID prod = prod[prod.cellid == cell[id_col]] # Sort by cloud cover and date prod = prod.sort_values(['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) # Get only the image with less cloud cover prod = prod.head(1) dfs.append(prod) fdf = merge_df(dfs) fdf = df_to_geodf(fdf, 'geometry', epsg) df_to_shp(fdf, outshp) return outshp
def join_attr_by_distance(mainTable, joinTable, workGrass, epsg_code, output): """ Find nearest feature and join attributes of the nearest feature to the mainTable Uses GRASS GIS to find near lines. """ import os from glass.g.wenv.grs import run_grass from glass.g.rd.shp import shp_to_obj from glass.g.it.pd import df_to_geodf from glass.g.wt.shp import df_to_shp from glass.pys.oss import fprop # Create GRASS GIS Location grassBase = run_grass(workGrass, location='join_loc', srs=epsg_code) import grass.script as grass import grass.script.setup as gsetup gsetup.init(grassBase, workGrass, 'join_loc', 'PERMANENT') # Import some GRASS GIS tools from glass.g.gp.prox import grs_near as near from glass.g.it.shp import shp_to_grs, grs_to_shp # Import data into GRASS GIS grsMain = shp_to_grs(mainTable, fprop(mainTable, 'fn', forceLower=True)) grsJoin = shp_to_grs(joinTable, fprop(joinTable, 'fn', forceLower=True)) # Get distance from each feature of mainTable to the nearest feature # of the join table near(grsMain, grsJoin, nearCatCol="tocat", nearDistCol="todistance") # Export data from GRASS GIS ogrMain = grs_to_shp(grsMain, os.path.join(workGrass, 'join_loc', grsMain + '_grs.shp'), None, asMultiPart=True) ogrJoin = grs_to_shp(grsJoin, os.path.join(workGrass, 'join_loc', grsJoin + '_grs.shp'), None, asMultiPart=True) dfMain = shp_to_obj(ogrMain) dfJoin = shp_to_obj(ogrJoin) dfResult = dfMain.merge(dfJoin, how='inner', left_on='tocat', right_on='cat') dfResult.drop(["geometry_y", "cat_y"], axis=1, inplace=True) dfResult.rename(columns={"cat_x": "cat_grass"}, inplace=True) dfResult["tocat"] = dfResult["tocat"] - 1 dfResult["cat_grass"] = dfResult["cat_grass"] - 1 dfResult = df_to_geodf(dfResult, "geometry_x", epsg_code) df_to_shp(dfResult, output) return output
def joinLines_by_spatial_rel_raster(mainLines, mainId, joinLines, joinCol, outfile, epsg): """ Join Attributes based on a spatial overlap. An raster based approach """ import os import pandas from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import df_to_shp from glass.g.gp.ext import shpext_to_boundshp from glass.g.dp.torst import shp_to_rst from glass.g.it.pd import df_to_geodf from glass.g.wenv.grs import run_grass from glass.ng.pd.joins import join_dfs from glass.ng.pd.agg import df_groupBy from glass.pys.oss import fprop, mkdir workspace = mkdir(os.path.join(os.path.dirname(mainLines, 'tmp_dt'))) # Create boundary file boundary = shpext_to_boundshp(mainLines, os.path.join(workspace, "bound.shp"), epsg) boundRst = shp_to_rst(boundary, None, 5, -99, os.path.join(workspace, "rst_base.tif"), epsg=epsg, api='gdal') # Start GRASS GIS Session gbase = run_grass(workspace, location="grs_loc", srs=boundRst) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, workspace, "grs_loc", "PERMANENT") from glass.g.rst.local import combine from glass.g.prop.rst import get_rst_report_data from glass.g.it.shp import shp_to_grs, grs_to_shp from glass.g.dp.torst import grsshp_to_grsrst as shp_to_rst # Add data to GRASS GIS mainVector = shp_to_grs(mainLines, fprop(mainLines, 'fn', forceLower=True)) joinVector = shp_to_grs(joinLines, fprop(joinLines, 'fn', forceLower=True)) mainRst = shp_to_rst(mainVector, mainId, f"rst_{mainVector}") joinRst = shp_to_rst(joinVector, joinCol, f"rst_{joinVector}") combRst = combine(mainRst, joinRst, "combine_rst", api="pygrass") combine_data = get_rst_report_data(combRst, UNITS="c") combDf = pandas.DataFrame(combine_data, columns=["comb_cat", "rst_1", "rst_2", "ncells"]) combDf = combDf[combDf["rst_2"] != '0'] combDf["ncells"] = combDf["ncells"].astype(int) gbdata = df_groupBy(combDf, ["rst_1"], "MAX", "ncells") fTable = join_dfs(gbdata, combDf, ["rst_1", "ncells"], ["rst_1", "ncells"]) fTable["rst_2"] = fTable["rst_2"].astype(int) fTable = df_groupBy(fTable, ["rst_1", "ncells"], STAT='MIN', STAT_FIELD="rst_2") mainLinesCat = grs_to_shp(mainVector, os.path.join(workspace, mainVector + '.shp'), 'line') mainLinesDf = shp_to_obj(mainLinesCat) resultDf = join_dfs(mainLinesDf, fTable, "cat", "rst_1", onlyCombinations=None) resultDf.rename(columns={"rst_2": joinCol}, inplace=True) resultDf = df_to_geodf(resultDf, "geometry", epsg) df_to_shp(resultDf, outfile) return outfile
def closest_facility(incidents, incidents_id, facilities, output, impedance='TravelTime'): """ impedance options: * TravelTime; * WalkTime; """ import requests import pandas as pd import numpy as np from glass.cons.esri import rest_token, CF_URL from glass.g.it.esri import json_to_gjson from glass.g.rd.shp import shp_to_obj from glass.g.wt.shp import df_to_shp from glass.ng.pd.split import df_split from glass.ng.pd import merge_df from glass.g.prop.prj import get_shp_epsg from glass.g.prj.obj import df_prj from glass.g.it.pd import df_to_geodf from glass.g.it.pd import json_obj_to_geodf from glass.cons.esri import get_tv_by_impedancetype # Get API token token = rest_token() # Data to Pandas DataFrames fdf = shp_to_obj(facilities) idf = shp_to_obj(incidents) # Re-project to WGS84 fdf = df_prj(fdf, 4326) idf = df_prj(idf, 4326) # Geomtries to Str - inputs for requests fdf['coords'] = fdf.geometry.x.astype(str) + ',' + fdf.geometry.y.astype( str) idf['coords'] = idf.geometry.x.astype(str) + ',' + idf.geometry.y.astype( str) # Delete geometry from facilities DF idf.drop(['geometry'], axis=1, inplace=True) # Split data # ArcGIS API only accepts 100 facilities # # and 100 incidents in each request fdfs = df_split(fdf, 100, nrows=True) if fdf.shape[0] > 100 else [fdf] idfs = df_split(idf, 100, nrows=True) if idf.shape[0] > 100 else [idf] for i in range(len(idfs)): idfs[i].reset_index(inplace=True) idfs[i].drop(['index'], axis=1, inplace=True) for i in range(len(fdfs)): fdfs[i].reset_index(inplace=True) fdfs[i].drop(['index'], axis=1, inplace=True) # Get travel mode tv = get_tv_by_impedancetype(impedance) # Ask for results results = [] drop_cols = [ 'ObjectID', 'FacilityID', 'FacilityRank', 'Name', 'IncidentCurbApproach', 'FacilityCurbApproach', 'IncidentID', 'StartTime', 'EndTime', 'StartTimeUTC', 'EndTimeUTC', 'Total_Minutes', 'Total_TruckMinutes', 'Total_TruckTravelTime', 'Total_Miles' ] if impedance == 'WalkTime': tv_col = 'walktime' rn_cols = {'Total_WalkTime': tv_col} ndrop = ['Total_Kilometers', 'Total_TravelTime', 'Total_Minutes'] elif impedance == 'metric': tv_col = 'kilomts' rn_cols = {'Total_Kilometers': tv_col} ndrop = ['Total_WalkTime', 'Total_TravelTime', 'Total_Minutes'] else: tv_col = 'traveltime' rn_cols = {'Total_TravelTime': tv_col} ndrop = ['Total_Kilometers', 'Total_WalkTime', 'Total_Minutes'] drop_cols.extend(ndrop) for i_df in idfs: incidents_str = i_df.coords.str.cat(sep=';') for f_df in fdfs: facilities_str = f_df.coords.str.cat(sep=';') # Make request r = requests.get(CF_URL, params={ 'facilities': facilities_str, 'incidents': incidents_str, 'token': token, 'f': 'json', 'travelModel': tv, 'defaultTargetFacilityCount': '1', 'returnCFRoutes': True, 'travelDirection': 'esriNATravelDirectionToFacility', 'impedanceAttributeName': impedance }) if r.status_code != 200: raise ValueError('Error when requesting from: {}'.format( str(r.url))) # Convert ESRI json to GeoJson esri_geom = r.json() geom = json_to_gjson(esri_geom.get('routes')) # GeoJSON to GeoDataFrame gdf = json_obj_to_geodf(geom, 4326) # Delete unwanted columns gdf.drop(drop_cols, axis=1, inplace=True) # Rename some interest columns gdf.rename(columns=rn_cols, inplace=True) # Add to results original attributes of incidents r_df = gdf.merge(i_df, how='left', left_index=True, right_index=True) results.append(r_df) # Compute final result # Put every DataFrame in a single DataFrame fgdf = merge_df(results) # Since facilities were divided # The same incident has several "nearest" facilities # We just want one neares facility # Lets group by using min operator gpdf = pd.DataFrame(fgdf.groupby([incidents_id]).agg({tv_col: 'min' })).reset_index() gpdf.rename(columns={incidents_id: 'iid', tv_col: 'impcol'}, inplace=True) # Recovery geometry fgdf = fgdf.merge(gpdf, how='left', left_on=incidents_id, right_on='iid') fgdf = fgdf[fgdf[tv_col] == fgdf.impcol] fgdf = df_to_geodf(fgdf, 'geometry', 4326) # Remove repeated units g = fgdf.groupby('iid') fgdf['rn'] = g[tv_col].rank(method='first') fgdf = fgdf[fgdf.rn == 1] fgdf.drop(['iid', 'rn'], axis=1, inplace=True) # Re-project to original SRS epsg = get_shp_epsg(facilities) fgdf = df_prj(fgdf, epsg) # Export result df_to_shp(fgdf, output) return output