def dfcolstorows(inDf, colField, valField, colFid=None): """ Dataframe Like: | pop_res | ind2 | ind3 | id_unit 0 | 571 | 35.0975 | 123 | 3768 1 | 938 | 18.2114 | 265 | 3618 2 | 554 | 44.3149 | 76 | 3788 3 | 711 | 37.8619 | 134 | 3766 4 | 1268 | 46.0733 | 203 | 3690 To: | colField | valField 0 | pop_res | 571 1 | ind2 | 35.0975 2 | ind3 | 123 3 | id_unit | 3768 4 | pop_res | 938 5 | ind2 | 18.2114 6 | ind3 | 265 7 | id_unit | 3618 """ from gasp.pyt.df.to import merge_df newDfs = [] cols = list(inDf.columns.values) if colFid and colFid in cols: cols.remove(colFid) for col in cols: ndf = inDf.copy() ndf.drop([c for c in cols if c != col], axis=1, inplace=True) ndf[colField] = col ndf.rename(columns={col: valField}, inplace=True) newDfs.append(ndf) res = merge_df(newDfs) return res
def same_attr_to_shp(inShps, interestCol, outFolder, basename="data_", resultDict=None): """ For several SHPS with the same field, this program will list all values in such field and will create a new shp for all values with the respective geometry regardeless the origin shp. """ import os from gasp.pyt import obj_to_lst from gasp.gt.fmshp import shp_to_obj from gasp.pyt.df.to import merge_df from gasp.gt.toshp import df_to_shp EXT = os.path.splitext(inShps[0])[1] shpDfs = [shp_to_obj(shp) for shp in inShps] DF = merge_df(shpDfs, ignIndex=True) #DF.dropna(axis=0, how='any', inplace=True) uniqueVal = DF[interestCol].unique() nShps = [] if not resultDict else {} for val in uniqueVal: ndf = DF[DF[interestCol] == val] KEY = str(val).split('.')[0] if '.' in str(val) else str(val) nshp = df_to_shp( ndf, os.path.join(outFolder, '{}{}{}'.format(basename, KEY, EXT))) if not resultDict: nShps.append(nshp) else: nShps[KEY] = nshp return nShps
def lst_prod_by_cell_and_year(shp, id_col, year, outshp, platform="Sentinel-2", processingl='Level-2A', epsg=32629): """ Get a list of images: * one for each grid in shp; * one for each month in one year - the choosen image will be the one with lesser area occupied by clouds; total_images = grid_number * number_months_year """ from gasp.gt.fmshp import shp_to_obj from gasp.pyt.df.to import merge_df from gasp.gt.toshp import df_to_shp from gasp.g.to import df_to_geodf months = { '01': '31', '02': '28', '03': '31', '04': '30', '05': '31', '06': '30', '07': '31', '08': '31', '09': '30', '10': '31', '11': '30', '12': '31' } # Open SHP grid = shp_to_obj(shp, srs_to=4326) def get_grid_id(row): row['cellid'] = row.title.split('_')[5][1:] return row # Search for images dfs = [] for idx, cell in grid.iterrows(): for k in months: start = "{}{}01".format(str(year), k) end = "{}{}{}".format(str(year), k, months[k]) if year == 2018 and processingl == 'Level-2A': if k == '01' or k == '02': plevel = 'Level-2Ap' else: plevel = processingl else: plevel = processingl prod = lst_prod(cell.geometry.wkt, start, end, platname=platform, procLevel=plevel) if not prod.shape[0]: continue # Get area prod = prod.to_crs({'init': 'epsg:{}'.format(str(epsg))}) prod['areav'] = prod.geometry.area / 1000000 # We want only images with more than 70% of data prod = prod[prod.areav >= 7000] # ID Cell ID prod = prod.apply(lambda x: get_grid_id(x), axis=1) # Filter Cell ID prod = prod[prod.cellid == cell[id_col]] # Sort by cloud cover and date prod = prod.sort_values(['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True]) # Get only the image with less cloud cover prod = prod.head(1) dfs.append(prod) fdf = merge_df(dfs) fdf = df_to_geodf(fdf, 'geometry', epsg) df_to_shp(fdf, outshp) return outshp
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE): """ Search for data in DSN and other platforms by cell """ import time; from gasp.gt.fmshp import shp_to_obj from gasp.sql.db import create_db from gasp.sde.dsn.fb.places import places_by_query from gasp.g.prj import df_prj from gasp.pyt.df.to import merge_df from gasp.gt.toshp.db import dbtbl_to_shp from gasp.sql.to import q_to_ntbl from gasp.sql.to import df_to_db # Open GRID SHP GRID_DF = shp_to_obj(GRID_PNT) GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF GRID_DF["lng"] = GRID_DF.geometry.x.astype(float) GRID_DF["lat"] = GRID_DF.geometry.y.astype(float) GRID_DF["grid_id"] = GRID_DF.index # GET DATA RESULTS = [] def get_data(row, datasrc): if datasrc == 'facebook': d = places_by_query( {'x' : row.lng, 'y' : row.lat, 'r' : RADIUS}, 4326, keyword=None, epsgOut=EPSG, _limit='100', onlySearchAreaContained=None ) else: raise ValueError('{} as datasource is not a valid value'.format(datasrc)) if type(d) == int: return d['grid_id'] = row.grid_id RESULTS.append(d) time.sleep(5) GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1) RT = merge_df(RESULTS) # Create DB create_db(db, overwrite=True, api='psql') # Send Data to PostgreSQL df_to_db( db, RT, "{}_data".format(DATA_SOURCE), EPSG, "POINT", colGeom='geometry' if 'geometry' in RT.columns.values else 'geom' ) COLS = [ x for x in RT.columns.values if x != "geometry" and \ x != 'geom' and x != "grid_id" ] + ["geom"] GRP_BY_TBL = q_to_ntbl(db, "{}_grpby".format(DATA_SOURCE), ( "SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id " "FROM {dtsrc}_data GROUP BY {cols}" ).format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql') dbtbl_to_shp( db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG ) return OUTPUT_TABLE
def df_cols_to_rows(inDf, TO_COLS, col_old_col_name, key_old_col_name, col_mantain): """ Dataframe like: | pop_res | ind2 | ind3 | ind5 | id_unit |pop_res_int | ind2_int | ind3_int| ind5_int 0 | 571 | 35.0975 | 123 | 97.373 | 3768 | 2 | 6 | 2 | 7 1 | 938 | 18.2114 | 265 | 93.4968 | 3618 | 3 | 1 | 5 | 4 2 | 554 | 44.3149 | 76 | 97.4074 | 3788 | 1 | 7 | 1 | 7 3 | 711 | 37.8619 | 134 | 96.1429 | 3766 | 2 | 6 | 3 | 6 4 | 1268 | 46.0733 | 203 | 90.9385 | 3690 | 5 | 7 | 4 | 3 To: 0 | id_unit | id_indicator | value | cls 2 | 3768 | pop_res | 571 | 2 3 | 3768 | ind2 | 35.0975 | 6 4 | 3768 | ind3 | 123 | 2 5 | 3768 | ind5 | 97.373 | 7 6 | 3618 | pop_res | 938 | 3 7 | 3618 | ind2 | 18.2114 | 1 8 | 3618 | ind3 | 265 | 5 9 | 3618 | ind5 | 93.4968 | 4 ... Using as parameters: data_cols = ['pop_res', 'ind2', 'ind3', 'ind5'] col_mantain = 'id_unit' TO_COLS = { # Dict values should have the same length 'value' : data_cols, 'cls' : [i + '_int' for i in data_cols] } col_old_col_name = 'id_indicator' key_old_col_name = 'value' """ from gasp.pyt import obj_to_lst col_mantain = obj_to_lst(col_mantain) newCols = list(TO_COLS.keys()) newDfs = [] for i in range(len(TO_COLS[newCols[0]])): ndf = inDf.copy(deep=True) DROP_COLS = [] COLS_MANT = col_mantain.copy() for K in TO_COLS: COLS_MANT.append(TO_COLS[K][i]) for col in ndf.columns.values: if col not in COLS_MANT: DROP_COLS.append(col) ndf.drop(DROP_COLS, axis=1, inplace=True) ndf.rename(columns={TO_COLS[k][i]: k for k in TO_COLS}, inplace=True) ndf[col_old_col_name] = TO_COLS[key_old_col_name][i] newDfs.append(ndf) outDf = merge_df(newDfs) return outDf