def txts_to_db(folder, db, delimiter, __encoding='utf-8', apidb='psql', rewrite=None): """ Executes tbl_to_db for every file in a given folder The file name will be the table name """ from gasp.pyt.oss import lst_ff, fprop from gasp.sql.i import db_exists if not db_exists(db): # Create database from gasp.sql.db import create_db create_db(db, api=apidb, overwrite=None) else: if rewrite: from gasp.sql.db import create_db create_db(db, api=db, overwrite=True) __files = lst_ff(folder, file_format=['.txt', '.csv', '.tsv']) """ Send data to DB using Pandas """ for __file in __files: tbl_to_db( __file, db, fprop(__file, 'fn'), delimiter=delimiter, encoding_=__encoding, api_db=apidb )
def osm_to_psql(osmXml, osmdb): """ Use GDAL to import osmfile into PostGIS database """ from gasp import exec_cmd from gasp.cons.psql import con_psql from gasp.sql.i import db_exists is_db = db_exists(osmdb) if not is_db: from gasp.sql.db import create_db create_db(osmdb, api='psql') con = con_psql() cmd = ("ogr2ogr -f PostgreSQL \"PG:dbname='{}' host='{}' port='{}' " "user='******' password='******'\" {} -lco COLUM_TYPES=other_tags=hstore" ).format(osmdb, con["HOST"], con["PORT"], con["USER"], con["PASSWORD"], osmXml) cmdout = exec_cmd(cmd) return osmdb
def break_lines_on_points(lineShp, pntShp, outShp, lnhidonpnt, api='shply', db=None): """ Break lines on points location api's available: - shply (shapely); - psql (postgis); """ if api == 'shply': result = shply_break_lines_on_points( lineShp, pntShp, lnhidonpnt, outShp) elif api == 'psql': from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.gql.to import shp_to_psql from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gql.brk import split_lines_on_pnt # Create DB if not db: db = create_db(fprop(lineShp, 'fn', forceLower=True), api='psql') else: from gasp.sql.i import db_exists isDb = db_exists(db) if not isDb: db = create_db(db, api='psql') # Send Data to BD lnhTbl = shp_to_psql(db, lineShp, api="shp2pgsql") pntTbl = shp_to_psql(db, pntShp, api="shp2pgsql") # Get result outTbl = split_lines_on_pnt( db, lnhTbl, pntTbl, fprop(outShp, 'fn', forceLower=True), lnhidonpnt, 'gid' ) # Export result result = dbtbl_to_shp( db, outTbl, "geom", outShp, inDB='psql', tableIsQuery=None, api="pgsql2shp" ) else: raise ValueError( "API {} is not available".format(api) ) return result
def db_to_db(db_a, db_b, typeDBA, typeDBB): """ All tables in one Database to other database Useful when we want to migrate a SQLITE DB to a PostgreSQL DB typesDB options: * sqlite * psql """ import os from gasp.sql.fm import q_to_obj from gasp.sql.i import lst_tbl from gasp.sql.db import create_db # List Tables in DB A tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA) # Create database B db_b = create_db(db_b, overwrite=False, api=typeDBB) # Table to Database B for tbl in tbls: df = q_to_obj( db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA ) df_to_db(db_b, df, tbl, append=None, api=typeDBB)
def matrix_od_mean_dist_by_group(MATRIX_OD, ORIGIN_COL, GROUP_ORIGIN_ID, GROUP_ORIGIN_NAME, GROUP_DESTINA_ID, GROUP_DESTINA_NAME, TIME_COL, epsg, db, RESULT_MATRIX): """ Calculate Mean GROUP distance from OD Matrix OD MATRIX EXAMPLE | origin_entity | origin_group | destina_entity | destina_group | distance | XXXX | XXXX | XXXX | XXX | XXX OUTPUT EXAMPLE | origin_group | destina_group | mean_distance | XXXX | XXXX | XXXX """ import os from gasp.pyt.oss import fprop from gasp.gql.to import shp_to_psql from gasp.sql.db import create_db from gasp.sql.to import q_to_ntbl from gasp.to import db_to_tbl db = create_db(fprop(MATRIX_OD, 'fn'), overwrite=True, api='psql') TABLE = shp_to_psql(db, MATRIX_OD, pgTable="tbl_{}".format(db), api="pandas", srsEpsgCode=epsg) OUT_TABLE = q_to_ntbl( db, fprop(RESULT_MATRIX, 'fn'), ("SELECT {groupOriginCod}, {groupOriginName}, {groupDestCod}, " "{groupDestName}, AVG(mean_time) AS mean_time FROM (" "SELECT {origin}, {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName}, " "AVG({timeCol}) AS mean_time FROM {t} " "GROUP BY {origin}, {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName}" ") AS foo " "GROUP BY {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName} " "ORDER BY {groupOriginCod}, {groupDestCod}").format( groupOriginCod=GROUP_ORIGIN_ID, groupOriginName=GROUP_ORIGIN_NAME, groupDestCod=GROUP_DESTINA_ID, groupDestName=GROUP_DESTINA_NAME, origin=ORIGIN_COL, timeCol=TIME_COL, t=TABLE), api='psql') return db_to_tbl(db, "SELECT * FROM {}".format(OUT_TABLE), RESULT_MATRIX, sheetsNames="matrix", dbAPI='psql')
def line_intersect_to_pnt(inShp, outShp, db=None): """ Get Points where two line features of the same feature class intersects. """ from gasp.pyt.oss import fprop from gasp.gt.toshp.db import dbtbl_to_shp from gasp.sql.db import create_db from gasp.gql.to import shp_to_psql from gasp.gql.ovly import line_intersection_pnt # Create DB if necessary if not db: db = create_db(fprop(inShp, 'fn', forceLower=True), api='psql') else: from gasp.sql.i import db_exists isDb = db_exists(db) if not isDb: create_db(db, api='psql') # Send data to DB inTbl = shp_to_psql(db, inShp, api="shp2pgsql") # Get result outTbl = line_intersection_pnt(db, inTbl, fprop(outShp, 'fn', forceLower=True)) # Export data from DB outShp = dbtbl_to_shp(db, outTbl, "geom", outShp, inDB='psql', tableIsQuery=None, api="pgsql2shp") return outShp
def merge_dbs(destinationDb, dbs, tbls_to_merge=None, ignoreCols=None): """ Put several database into one For now works only with PostgreSQL """ import os from gasp.pyt.oss import fprop, del_file from gasp.sql import psql_cmd from gasp.sql.i import db_exists, lst_tbl from gasp.sql.db import create_db, drop_db from gasp.sql.tbl import rename_tbl, tbls_to_tbl from gasp.sql.fm import dump_tbls from gasp.sql.to import restore_tbls from gasp.sql.tbl import distinct_to_table, del_tables # Prepare database fdb = fprop(destinationDb, ['fn', 'ff']) if os.path.isfile(destinationDb): if fdb['fileformat'] == '.sql': newdb = create_db(fdb['filename'], overwrite=True, api='psql') psql_cmd(newdb, destinationDb) destinationDb = newdb else: raise ValueError(( 'destinationDb is a file but is not correct. The file must be' ' a SQL Script' )) else: # Check if destination db exists if not db_exists(destinationDb): create_db(destinationDb, overwrite=None, api='psql') # Check if dbs is a list or a dir if type(dbs) == list: dbs = dbs elif os.path.isdir(dbs): # list SQL files from gasp.pyt.oss import lst_ff dbs = lst_ff(dbs, file_format='.sql') else: raise ValueError( ''' dbs value should be a list with paths to sql files or a dir with sql files inside ''' ) TABLES = {} for i in range(len(dbs)): # Create DB DB_NAME = fprop(dbs[i], 'fn') create_db(DB_NAME, overwrite=True, api='psql') # Restore DB psql_cmd(DB_NAME, dbs[i]) # List Tables if not tbls_to_merge: tbls__ = lst_tbl(DB_NAME, excludeViews=True, api='psql') tbls = [t for t in tbls__ if t not in ignoreCols] else: tbls = tbls_to_merge # Rename Tables newTbls = rename_tbl(DB_NAME, {tbl : "{}_{}".format( tbl, str(i)) for tbl in tbls}) for t in range(len(tbls)): if tbls[t] not in TABLES: TABLES[tbls[t]] = ["{}_{}".format(tbls[t], str(i))] else: TABLES[tbls[t]].append("{}_{}".format(tbls[t], str(i))) # Dump Tables SQL_DUMP = os.path.join( os.path.dirname(dbs[i]), 'tbl_{}.sql'.format(DB_NAME) ); dump_tbls(DB_NAME, newTbls, SQL_DUMP) # Restore Tables in the destination Database restore_tbls(destinationDb, SQL_DUMP, newTbls) # Delete Temp Database drop_db(DB_NAME) # Delete SQL File del_file(SQL_DUMP) # Union of all tables max_len = max([len(TABLES[t]) for t in TABLES]) for tbl in TABLES: # Rename original table NEW_TBL = "{}_{}".format(tbl, max_len) rename_tbl(destinationDb, {tbl : NEW_TBL}) TABLES[tbl].append(NEW_TBL) # Union tbls_to_tbl(destinationDb, TABLES[tbl], tbl + '_tmp') # Group By distinct_to_table(destinationDb, tbl + '_tmp', tbl, cols=None) # Drop unwanted tables del_tables(destinationDb, TABLES[tbl] + [tbl + '_tmp']) return destinationDb
def get_stop_words(inTbl, fidCol, txtCol, outFile, lang='portuguese', inSheet=None, db=None): """ Pick a text column and save it in a new column only with the stop words. Uses PostgreSQL dictionaries to get stop words """ from gasp.pyt.oss import fprop from gasp.sql.i import cols_name from gasp.sql.db import create_db from gasp.sql.to import tbl_to_db from gasp.to import db_to_tbl FILENAME = fprop(inTbl, 'fn') # Create Temp database db = create_db("db_" + FILENAME if not db else db) # Send table to PostgreSQL tbl = tbl_to_db(inTbl, db, FILENAME, sheet=inSheet, api_db='psql') cols = cols_name(db, tbl, sanitizeSpecialWords=None, api='psql') # Sanitize data and create a new column only with stop words Q1 = ("(SELECT *, to_tsvector('{_lang}', regexp_replace(" "regexp_replace(lower(unaccent({txt_c})), 'http://[^:\s]+(\S+)', " "' ', 'g'), '[^\w]+', ' ', 'g')) " "AS txt_data FROM {t}) AS stop_table").format(_lang=lang, txt_c=txtCol, t=tbl) Q2 = ("SELECT {selCols}, ARRAY_TO_STRING(array_agg(" "word ORDER BY word_index), ' ', '*') AS {outCol}, " "REPLACE(CAST(STRIP(" "stop_table.txt_data) AS text), '''', '') AS no_duplicated " "FROM (" "SELECT fid, word, CAST(UNNEST(word_index) AS integer) " "AS word_index FROM (" "SELECT fid, SPLIT_PART(tst, ';', 1) AS word, " "STRING_TO_ARRAY(SPLIT_PART(tst, ';', 2), ',') AS word_index FROM (" "SELECT {fid} AS fid, REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(" "CAST(UNNEST(txt_data) AS text), " "',{{', ',\"{{'), ',\"{{', ';'), '}}\"', ''), " "'(', ''), '}}', '') AS tst " "FROM {tbl}" ") AS foo" ") AS foo2" ") AS foo3 INNER JOIN {tbl} ON foo3.fid = stop_table.{fid} " "GROUP BY {selCols}, stop_table.txt_data").format( outCol="clean_" + txtCol, tbl=Q1, fid=fidCol, selCols=", ".join(["stop_table.{}".format(i) for i in cols])) # Export new table return db_to_tbl(db, Q2, outFile, sheetsNames=inSheet)
# Create ZIP Table ("CREATE TABLE zip_vistoburn AS " "SELECT rowi, coli, array_agg(pntid) AS pntid " "FROM vistoburn GROUP BY rowi, coli"), # Delete vistoburn "DROP TABLE IF EXISTS vistoburn" ] import os from gasp.sql import psql_cmd from gasp.pyt.oss import lst_ff, fprop from gasp.sql.q import exec_write_q from gasp.sql.fm import dump_db from gasp.sql.db import create_db, drop_db sqls = lst_ff(sql_fld) for sql in sqls: # Restore database new_db = create_db(fprop(sql, 'fn')) psql_cmd(new_db, sql) # Execute queries exec_write_q(new_db, QS) # Dump Database dump_db(new_db, os.path.join(outfld, os.path.basename(sql)), api='psql') # Drop Database drop_db(new_db)
def lnh_to_polygons(inShp, outShp, api='saga', db=None): """ Line to Polygons API's Available: * saga; * grass; * pygrass; * psql; """ if api == 'saga': """ http://www.saga-gis.org/saga_tool_doc/7.0.0/shapes_polygons_3.html Converts lines to polygons. Line arcs are closed to polygons simply by connecting the last point with the first. Optionally parts of polylines can be merged into one polygon optionally. """ from gasp import exec_cmd rcmd = exec_cmd(("saga_cmd shapes_polygons 3 -POLYGONS {} " "LINES {} -SINGLE 1 -MERGE 1").format(outShp, inShp)) elif api == 'grass' or api == 'pygrass': # Do it using GRASS GIS import os from gasp.gt.wenv.grs import run_grass from gasp.pyt.oss import fprop # Create GRASS GIS Session wk = os.path.dirname(outShp) lo = fprop(outShp, 'fn', forceLower=True) gs = run_grass(wk, lo, srs=inShp) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gs, wk, lo, 'PERMANENT') # Import Packages from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp from gasp.gt.toshp.cgeo import line_to_polyline from gasp.gt.toshp.cgeo import geomtype_to_geomtype from gasp.gt.toshp.cgeo import boundary_to_areas # Send data to GRASS GIS lnh_shp = shp_to_grs(inShp, fprop(inShp, 'fn', forceLower=True), asCMD=True if api == 'grass' else None) # Build Polylines pol_lnh = line_to_polyline(lnh_shp, "polylines", asCmd=True if api == 'grass' else None) # Polyline to boundary bound = geomtype_to_geomtype(pol_lnh, 'bound_shp', 'line', 'boundary', cmd=True if api == 'grass' else None) # Boundary to Area areas_shp = boundary_to_areas(bound, lo, useCMD=True if api == 'grass' else None) # Export data outShp = grs_to_shp(areas_shp, outShp, 'area', asCMD=True if api == 'grass' else None) elif api == 'psql': """ Do it using PostGIS """ from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.gql.to import shp_to_psql from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gql.cnv import lnh_to_polg from gasp.gt.prop.prj import get_epsg_shp # Create DB if not db: db = create_db(fprop(inShp, 'fn', forceLower=True), api='psql') else: from gasp.sql.i import db_exists isDB = db_exists(db) if not isDB: create_db(db, api='psql') # Send data to DB in_tbl = shp_to_psql(db, inShp, api="shp2pgsql") # Get Result result = lnh_to_polg(db, in_tbl, fprop(outShp, 'fn', forceLower=True)) # Export Result outshp = dbtbl_to_shp(db, result, "geom", outShp, api='psql', epsg=get_epsg_shp(inShp)) else: raise ValueError("API {} is not available".format(api)) return outShp
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB, GRASS_REGION_TEMPLATE): """ Script to check differences between pairs of Feature Classes Suponha que temos diversas Feature Classes (FC) e que cada uma delas possui um determinado atributo; imagine tambem que, considerando todos os pares possiveis entre estas FC, se pretende comparar as diferencas na distribuicao dos valores desse atributo para cada par. * Dependencias: - GRASS; - PostgreSQL; - PostGIS. """ import datetime import os import pandas from gasp.sql.fm import q_to_obj from gasp.to import db_to_tbl from gasp.sql.to import df_to_db from gasp.gt.toshp.cff import shp_to_shp from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gt.toshp.rst import rst_to_polyg from gasp.gql.to import shp_to_psql from gasp.gql.tomtx import tbl_to_area_mtx from gasp.gt.prop.ff import check_isRaster from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.sql.tbl import tbls_to_tbl from gasp.sql.to import q_to_ntbl from gasp.gql.cln import fix_geom from gasp.to import db_to_tbl # Check if folder exists, if not create it if not os.path.exists(OUT_FOLDER): from gasp.pyt.oss import mkdir mkdir(OUT_FOLDER, overwrite=None) else: raise ValueError('{} already exists!'.format(OUT_FOLDER)) from gasp.gt.wenv.grs import run_grass gbase = run_grass(OUT_FOLDER, grassBIN='grass78', location='shpdif', srs=GRASS_REGION_TEMPLATE) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT') from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp from gasp.gt.torst import rst_to_grs from gasp.gt.tbl.fld import rn_cols # Convert to SHAPE if file is Raster i = 0 _SHP_TO_COMPARE = {} for s in SHAPES_TO_COMPARE: isRaster = check_isRaster(s) if isRaster: # To GRASS rstName = fprop(s, 'fn') inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True) # To Vector d = rst_to_polyg(inRst, rstName, rstColumn="lulc_{}".format(i), gisApi="grass") # Export Shapefile shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(i) else: # To GRASS grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True) # Change name of column with comparing value ncol = "lulc_{}".format(str(i)) rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))}, api="grass") # Export shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'), "area") _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i)) i += 1 SHAPES_TO_COMPARE = _SHP_TO_COMPARE __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE # Create database create_db(DB, api='psql') """ Union SHAPEs """ UNION_SHAPE = {} FIX_GEOM = {} SHPS = list(__SHAPES_TO_COMPARE.keys()) for i in range(len(SHPS)): for e in range(i + 1, len(SHPS)): # Optimized Union print("Union between {} and {}".format(SHPS[i], SHPS[e])) time_a = datetime.datetime.now().replace(microsecond=0) __unShp = optimized_union_anls( SHPS[i], SHPS[e], os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)), GRASS_REGION_TEMPLATE, os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)), multiProcess=True) time_b = datetime.datetime.now().replace(microsecond=0) print(time_b - time_a) # Rename cols unShp = rn_cols( __unShp, { "a_" + __SHAPES_TO_COMPARE[SHPS[i]]: __SHAPES_TO_COMPARE[SHPS[i]], "b_" + __SHAPES_TO_COMPARE[SHPS[e]]: __SHAPES_TO_COMPARE[SHPS[e]] }) UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp # Send data to postgresql SYNTH_TBL = {} for uShp in UNION_SHAPE: # Send data to PostgreSQL union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql') # Produce table with % of area equal in both maps areaMapTbl = q_to_ntbl( DB, "{}_syn".format(union_tbl), ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, " "CAST('{lulc_2}' AS text) AS lulc_2, " "round(" "CAST(SUM(g_area) / 1000000 AS numeric), 4" ") AS agree_area, round(" "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4" ") AS agree_percentage, " "round(" "CAST(MIN(total_area) / 1000000 AS numeric), 4" ") AS total_area FROM (" "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, " "CASE " "WHEN {map1_cls} = {map2_cls} " "THEN 1 ELSE 0 " "END AS isthesame, total_area FROM {tbl}, (" "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}" ") AS foo2" ") AS foo WHERE isthesame = 1 " "GROUP BY isthesame").format( lulc_1=fprop(uShp[0], 'fn'), lulc_2=fprop(uShp[1], 'fn'), map1_cls=__SHAPES_TO_COMPARE[uShp[0]], map2_cls=__SHAPES_TO_COMPARE[uShp[1]], tbl=union_tbl), api='psql') # Produce confusion matrix for the pair in comparison matrixTbl = tbl_to_area_mtx(DB, union_tbl, __SHAPES_TO_COMPARE[uShp[0]], __SHAPES_TO_COMPARE[uShp[1]], union_tbl + '_mtx') SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl} # UNION ALL TOTAL TABLES total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL], 'total_table') # Create table with % of agreement between each pair of maps mapsNames = q_to_obj( DB, ("SELECT lulc FROM (" "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2" ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table), db_api='psql').lulc.tolist() FLDS_TO_PIVOT = ["agree_percentage", "total_area"] Q = ("SELECT * FROM crosstab('" "SELECT CASE " "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 " "END AS lulc_1, CASE " "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 " "END AS lulc_2, CASE " "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 " "END AS agree_percentage FROM (" "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL " "SELECT lulc_1, lulc_2, {valCol} FROM (" "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} " "FROM {tbl}" ") AS tst" ") AS foo FULL JOIN (" "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM (" "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2" ") AS tst_1, (" "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 " "UNION ALL " "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2" ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2" ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 " "ORDER BY lulc_1, lulc_2" "') AS ct(" "lulc_map text, {crossCols}" ")") TOTAL_AGREE_TABLE = None TOTAL_AREA_TABLE = None for f in FLDS_TO_PIVOT: if not TOTAL_AGREE_TABLE: TOTAL_AGREE_TABLE = q_to_ntbl( DB, "agreement_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') else: TOTAL_AREA_TABLE = q_to_ntbl(DB, "area_table", Q.format(tbl=total_table, valCol=f, crossCols=", ".join([ "{} numeric".format(map_) for map_ in mapsNames ])), api='psql') # Union Mapping UNION_MAPPING = pandas.DataFrame( [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE], columns=['shp_a', 'shp_b', 'union_shp']) UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql') # Export Results TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL] SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [ "{}_{}".format(fprop(x[0], 'fn')[:15], fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL ] db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES], REPORT, sheetsNames=SHEETS, dbAPI='psql') return REPORT
def psql_to_djgdb(sql_dumps, db_name, djg_proj=None, mapTbl=None, userDjgAPI=None): """ Import PGSQL database in a SQL Script into the database controlled by one Django Project To work, the name of a model instance of type foreign key should be equal to the name of the 'db_column' clause. """ import os from gasp import __import from gasp.pyt import obj_to_lst from gasp.sql.to import restore_tbls from gasp.sql.db import create_db, drop_db from gasp.sql.i import lst_tbl from gasp.sql.fm import q_to_obj from gasp.web.djg.mdl.rel import order_mdl_by_rel from gasp.web.djg.mdl.i import lst_mdl_proj # Global variables TABLES_TO_EXCLUDE = [ 'geography_columns', 'geometry_columns', 'spatial_ref_sys', 'raster_columns', 'raster_columns', 'raster_overviews', 'pointcloud_formats', 'pointcloud_columns' ] # Several SQL Files are expected sql_scripts = obj_to_lst(sql_dumps) # Create Database tmp_db_name = db_name + '_xxxtmp' create_db(tmp_db_name) # Restore tables in SQL files for sql in sql_scripts: restore_tbls(tmp_db_name, sql) # List tables in the database tables = [x for x in lst_tbl(tmp_db_name, excludeViews=True, api='psql') ] if not mapTbl else mapTbl # Open Django Project if djg_proj: from gasp.web.djg import open_Django_Proj application = open_Django_Proj(djg_proj) # List models in project app_mdls = lst_mdl_proj(djg_proj, thereIsApp=True, returnClassName=True) data_tbl = {} for t in tables: if t == 'auth_user' or t == 'auth_group' or t == 'auth_user_groups': data_tbl[t] = t elif t.startswith('auth') or t.startswith('django'): continue elif t not in app_mdls or t in TABLES_TO_EXCLUDE: continue else: data_tbl["{}.models.{}".format(t.split('_')[0], app_mdls[t])] = t from django.contrib.gis.db import models mdl_cls = [ "{}.models.{}".format(m.split('_')[0], app_mdls[m]) for m in app_mdls ] orderned_table = order_mdl_by_rel(mdl_cls) # Add default tables of Django def_djg_tbl = [] if 'auth_group' in data_tbl: def_djg_tbl.append('auth_group') if 'auth_user' in data_tbl: def_djg_tbl.append('auth_user') if 'auth_user_groups' in data_tbl: def_djg_tbl.append('auth_user_groups') orderned_table = def_djg_tbl + orderned_table if userDjgAPI: for table in orderned_table: # Map pgsql table data tableData = q_to_obj(tmp_db_name, data_tbl[table], of='dict') # Table data to Django Model if table == 'auth_user': mdl_cls = __import('django.contrib.auth.models.User') elif table == 'auth_group': mdl_cls = __import('django.contrib.auth.models.Group') else: mdl_cls = __import(table) __mdl = mdl_cls() for row in tableData: for col in row: # Check if field is a foreign key field_obj = mdl_cls._meta.get_field(col) if not isinstance(field_obj, models.ForeignKey): # If not, use the value # But first check if value is nan (special type of float) if row[col] != row[col]: row[col] = None setattr(__mdl, col, row[col]) else: # If yes, use the model instance of the related table # Get model of the table related com aquela cujos dados # estao a ser restaurados related_name = field_obj.related_model.__name__ related_model = __import('{a}.models.{m}'.format( a=table.split('_')[0], m=related_name)) # If NULL, continue if not row[col]: setattr(__mdl, col, row[col]) continue related_obj = related_model.objects.get( pk=int(row[col])) setattr(__mdl, col, related_obj) __mdl.save() else: import json import pandas as pd from gasp.sql.fm import q_to_obj from gasp.sql.to import df_to_db for tbl in orderned_table: if tbl not in data_tbl: continue data = q_to_obj(tmp_db_name, "SELECT * FROM {}".format(data_tbl[tbl])) if tbl == 'auth_user': data['last_login'] = pd.to_datetime(data.last_login, utc=True) data['date_joined'] = pd.to_datetime(data.date_joined, utc=True) df_to_db(db_name, data, data_tbl[tbl], append=True) drop_db(tmp_db_name)
def run_viewshed_by_cpu(tid, db, obs, dem, srs, vis_basename='vis', maxdst=None, obselevation=None): # Create Database new_db = create_db("{}_{}".format(db, str(tid)), api='psql') # Points to Database pnt_tbl = df_to_db(new_db, obs, 'pnt_tbl', api='psql', epsg=srs, geomType='Point', colGeom='geometry') # Create GRASS GIS Session workspace = mkdir( os.path.join(os.path.dirname(dem), 'work_{}'.format(str(tid)))) loc_name = 'vis_loc' gbase = run_grass(workspace, location=loc_name, srs=dem) # Start GRASS GIS Session import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, workspace, loc_name, 'PERMANENT') from gasp.gt.torst import rst_to_grs, grs_to_rst from gasp.gt.nop.surf import grs_viewshed from gasp.gt.deldt import del_rst # Send DEM to GRASS GIS grs_dem = rst_to_grs(dem, 'grs_dem', as_cmd=True) # Produce Viewshed for each point in obs for idx, row in obs.iterrows(): # Get Viewshed raster vrst = grs_viewshed(grs_dem, (row.geometry.x, row.geometry.y), '{}_{}'.format(vis_basename, str(row[obs_id])), max_dist=maxdst, obs_elv=obselevation) # Export Raster to File frst = grs_to_rst(vrst, os.path.join(workspace, vrst + '.tif')) # Raster to Array img = gdal.Open(frst) num = img.ReadAsArray() # Two Dimension to One Dimension # Reshape Array numone = num.reshape(num.shape[0] * num.shape[1]) # Get Indexes with visibility visnum = np.arange(numone.shape[0]).astype(np.uint32) visnum = visnum[numone == 1] # Get rows indexes visrow = visnum / num.shape[0] visrow = visrow.astype(np.uint32) # Get cols indexes viscol = visnum - (visrow * num.shape[1]) # Visibility indexes to Pandas DataFrame idxnum = np.full(visrow.shape, row[obs_id]) visdf = pd.DataFrame({ 'pntid': idxnum, 'rowi': visrow, 'coli': viscol }) # Pandas DF to database # Create Visibility table df_to_db(new_db, visdf, vis_basename, api='psql', colGeom=None, append=None if not idx else True) # Delete all variables numone = None visnum = None visrow = None viscol = None idxnum = None visdf = None del img # Delete GRASS GIS File del_rst(vrst) # Delete TIFF File del_file(frst) frst = None
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE): """ Search for data in DSN and other platforms by cell """ import time; from gasp.gt.fmshp import shp_to_obj from gasp.sql.db import create_db from gasp.sde.dsn.fb.places import places_by_query from gasp.g.prj import df_prj from gasp.pyt.df.to import merge_df from gasp.gt.toshp.db import dbtbl_to_shp from gasp.sql.to import q_to_ntbl from gasp.sql.to import df_to_db # Open GRID SHP GRID_DF = shp_to_obj(GRID_PNT) GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF GRID_DF["lng"] = GRID_DF.geometry.x.astype(float) GRID_DF["lat"] = GRID_DF.geometry.y.astype(float) GRID_DF["grid_id"] = GRID_DF.index # GET DATA RESULTS = [] def get_data(row, datasrc): if datasrc == 'facebook': d = places_by_query( {'x' : row.lng, 'y' : row.lat, 'r' : RADIUS}, 4326, keyword=None, epsgOut=EPSG, _limit='100', onlySearchAreaContained=None ) else: raise ValueError('{} as datasource is not a valid value'.format(datasrc)) if type(d) == int: return d['grid_id'] = row.grid_id RESULTS.append(d) time.sleep(5) GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1) RT = merge_df(RESULTS) # Create DB create_db(db, overwrite=True, api='psql') # Send Data to PostgreSQL df_to_db( db, RT, "{}_data".format(DATA_SOURCE), EPSG, "POINT", colGeom='geometry' if 'geometry' in RT.columns.values else 'geom' ) COLS = [ x for x in RT.columns.values if x != "geometry" and \ x != 'geom' and x != "grid_id" ] + ["geom"] GRP_BY_TBL = q_to_ntbl(db, "{}_grpby".format(DATA_SOURCE), ( "SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id " "FROM {dtsrc}_data GROUP BY {cols}" ).format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql') dbtbl_to_shp( db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG ) return OUTPUT_TABLE
def vector_based(osmdata, nomenclature, refRaster, lulcShp, overwrite=None, dataStore=None, RoadsAPI='POSTGIS'): """ Convert OSM Data into Land Use/Land Cover Information An vector based approach. TODO: Add a detailed description. RoadsAPI Options: * GRASS * SQLITE * POSTGIS """ # ************************************************************************ # # Python Modules from Reference Packages # # ************************************************************************ # import datetime import os import copy # ************************************************************************ # # GASP dependencies # # ************************************************************************ # from gasp.pyt.oss import fprop, mkdir from gasp.gt.wenv.grs import run_grass if RoadsAPI == 'POSTGIS': from gasp.sql.db import create_db from gasp.gql.to.osm import osm_to_psql from gasp.sql.db import drop_db from gasp.sql.fm import dump_db else: from gasp.gt.toshp.osm import osm_to_sqdb from gasp.sds.osm2lulc.utils import osm_project, add_lulc_to_osmfeat, get_ref_raster from gasp.gt.toshp.mtos import shps_to_shp from gasp.sds.osm2lulc.mod1 import grs_vector if RoadsAPI == 'SQLITE' or RoadsAPI == 'POSTGIS': from gasp.sds.osm2lulc.mod2 import roads_sqdb else: from gasp.sds.osm2lulc.mod2 import grs_vec_roads from gasp.sds.osm2lulc.m3_4 import grs_vect_selbyarea from gasp.sds.osm2lulc.mod5 import grs_vect_bbuffer from gasp.sds.osm2lulc.mod6 import vector_assign_pntags_to_build from gasp.gt.toshp.mtos import same_attr_to_shp from gasp.gt.prj import def_prj # ************************************************************************ # # Global Settings # # ************************************************************************ # # Check if input parameters exists! if not os.path.exists(os.path.dirname(lulcShp)): raise ValueError('{} does not exist!'.format(os.path.dirname(lulcShp))) if not os.path.exists(osmdata): raise ValueError( 'File with OSM DATA ({}) does not exist!'.format(osmdata)) if not os.path.exists(refRaster): raise ValueError( 'File with reference area ({}) does not exist!'.format(refRaster)) # Check if Nomenclature is valid nomenclature = "URBAN_ATLAS" if nomenclature != "URBAN_ATLAS" and \ nomenclature != "CORINE_LAND_COVER" and \ nomenclature == "GLOBE_LAND_30" else nomenclature time_a = datetime.datetime.now().replace(microsecond=0) # Create workspace for temporary files workspace = os.path.join(os.path.dirname(lulcShp), 'osmtolulc') if not dataStore else dataStore # Check if workspace exists if os.path.exists(workspace): if overwrite: mkdir(workspace) else: raise ValueError('Path {} already exists'.format(workspace)) else: mkdir(workspace) # Get Reference Raster refRaster, epsg = get_ref_raster(refRaster, workspace, cellsize=10) from gasp.sds.osm2lulc import osmTableData, PRIORITIES, LEGEND __priorities = PRIORITIES[nomenclature] __legend = LEGEND[nomenclature] time_b = datetime.datetime.now().replace(microsecond=0) if RoadsAPI != 'POSTGIS': # ******************************************************************** # # Convert OSM file to SQLITE DB # # ******************************************************************** # osm_db = osm_to_sqdb(osmdata, os.path.join(workspace, 'osm.sqlite')) else: # Convert OSM file to POSTGRESQL DB # osm_db = create_db(fprop(osmdata, 'fn', forceLower=True), overwrite=True) osm_db = osm_to_psql(osmdata, osm_db) time_c = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Add Lulc Classes to OSM_FEATURES by rule # # ************************************************************************ # add_lulc_to_osmfeat(osm_db, osmTableData, nomenclature, api='SQLITE' if RoadsAPI != 'POSTGIS' else RoadsAPI) time_d = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Transform SRS of OSM Data # # ************************************************************************ # osmTableData = osm_project( osm_db, epsg, api='SQLITE' if RoadsAPI != 'POSTGIS' else RoadsAPI, isGlobeLand=None if nomenclature != 'GLOBE_LAND_30' else True) time_e = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Start a GRASS GIS Session # # ************************************************************************ # grass_base = run_grass(workspace, grassBIN='grass78', location='grloc', srs=epsg) #import grass.script as grass import grass.script.setup as gsetup gsetup.init(grass_base, workspace, 'grloc', 'PERMANENT') # ************************************************************************ # # IMPORT SOME GASP MODULES FOR GRASS GIS # # ************************************************************************ # from gasp.gt.gop.ovlay import erase from gasp.gt.wenv.grs import rst_to_region from gasp.gt.gop.genze import dissolve from gasp.gt.tbl.grs import add_and_update, reset_table, update_table from gasp.gt.tbl.fld import add_fields from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp from gasp.gt.torst import rst_to_grs # ************************************************************************ # # SET GRASS GIS LOCATION EXTENT # # ************************************************************************ # extRst = rst_to_grs(refRaster, 'extent_raster') rst_to_region(extRst) time_f = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # MapResults # # ************************************************************************ # osmShps = [] # ************************************************************************ # # 1 - Selection Rule # # ************************************************************************ # ruleOneShp, timeCheck1 = grs_vector(osm_db, osmTableData['polygons'], apidb=RoadsAPI) osmShps.append(ruleOneShp) time_g = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 2 - Get Information About Roads Location # # ************************************************************************ # ruleRowShp, timeCheck2 = roads_sqdb( osm_db, osmTableData['lines'], osmTableData['polygons'], apidb=RoadsAPI ) if RoadsAPI == 'SQLITE' or RoadsAPI == 'POSTGIS' else grs_vec_roads( osm_db, osmTableData['lines'], osmTableData['polygons']) osmShps.append(ruleRowShp) time_h = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 3 - Area Upper than # # ************************************************************************ # if nomenclature != "GLOBE_LAND_30": ruleThreeShp, timeCheck3 = grs_vect_selbyarea(osm_db, osmTableData['polygons'], UPPER=True, apidb=RoadsAPI) osmShps.append(ruleThreeShp) time_l = datetime.datetime.now().replace(microsecond=0) else: timeCheck3 = None time_l = None # ************************************************************************ # # 4 - Area Lower than # # ************************************************************************ # if nomenclature != "GLOBE_LAND_30": ruleFourShp, timeCheck4 = grs_vect_selbyarea(osm_db, osmTableData['polygons'], UPPER=False, apidb=RoadsAPI) osmShps.append(ruleFourShp) time_j = datetime.datetime.now().replace(microsecond=0) else: timeCheck4 = None time_j = None # ************************************************************************ # # 5 - Get data from lines table (railway | waterway) # # ************************************************************************ # ruleFiveShp, timeCheck5 = grs_vect_bbuffer(osm_db, osmTableData["lines"], api_db=RoadsAPI) osmShps.append(ruleFiveShp) time_m = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 7 - Assign untagged Buildings to tags # # ************************************************************************ # if nomenclature != "GLOBE_LAND_30": ruleSeven11, ruleSeven12, timeCheck7 = vector_assign_pntags_to_build( osm_db, osmTableData['points'], osmTableData['polygons'], apidb=RoadsAPI) if ruleSeven11: osmShps.append(ruleSeven11) if ruleSeven12: osmShps.append(ruleSeven12) time_n = datetime.datetime.now().replace(microsecond=0) else: timeCheck7 = None time_n = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Produce LULC Map # # ************************************************************************ # """ Get Shps with all geometries related with one class - One Shape for Classe """ _osmShps = [] for i in range(len(osmShps)): if not osmShps[i]: continue _osmShps.append( grs_to_shp(osmShps[i], os.path.join(workspace, osmShps[i] + '.shp'), 'auto', lyrN=1, asCMD=True, asMultiPart=None)) for shp in _osmShps: def_prj(os.path.splitext(shp)[0] + '.prj', epsg=epsg, api='epsgio') _osmShps = same_attr_to_shp(_osmShps, "cat", workspace, "osm_", resultDict=True) del osmShps time_o = datetime.datetime.now().replace(microsecond=0) """ Merge all Classes into one feature class using a priority rule """ osmShps = {} for cls in _osmShps: if cls == '1': osmShps[1221] = shp_to_grs(_osmShps[cls], "osm_1221", asCMD=True) else: osmShps[int(cls)] = shp_to_grs(_osmShps[cls], "osm_" + cls, asCMD=True) # Erase overlapping areas by priority osmNameRef = copy.deepcopy(osmShps) for e in range(len(__priorities)): if e + 1 == len(__priorities): break if __priorities[e] not in osmShps: continue else: for i in range(e + 1, len(__priorities)): if __priorities[i] not in osmShps: continue else: osmShps[__priorities[i]] = erase( osmShps[__priorities[i]], osmShps[__priorities[e]], "{}_{}".format(osmNameRef[__priorities[i]], e), notTbl=True, api='pygrass') time_p = datetime.datetime.now().replace(microsecond=0) # Export all classes lst_merge = [] a = None for i in range(len(__priorities)): if __priorities[i] not in osmShps: continue if not a: reset_table(osmShps[__priorities[i]], { 'cls': 'varchar(5)', 'leg': 'varchar(75)' }, { 'cls': str(__priorities[i]), 'leg': str(__legend[__priorities[i]]) }) a = 1 else: add_and_update(osmShps[__priorities[i]], {'cls': 'varchar(5)'}, {'cls': str(__priorities[i])}) ds = dissolve(osmShps[__priorities[i]], 'dl_{}'.format(str(__priorities[i])), 'cls', api="grass") add_fields(ds, {'leg': 'varchar(75)'}, api="grass") update_table(ds, 'leg', str(__legend[__priorities[i]]), 'leg is null') lst_merge.append( grs_to_shp(ds, os.path.join(workspace, "lulc_{}.shp".format( str(__priorities[i]))), 'auto', lyrN=1, asCMD=True, asMultiPart=None)) time_q = datetime.datetime.now().replace(microsecond=0) if fprop(lulcShp, 'ff') != '.shp': lulcShp = os.path.join(os.path.dirname(lulcShp), fprop(lulcShp, 'fn') + '.shp') shps_to_shp(lst_merge, lulcShp, api='pandas') # Check if prj of lulcShp exists and create it if necessary prj_ff = os.path.splitext(lulcShp)[0] + '.prj' if not os.path.exists(prj_ff): def_prj(prj_ff, epsg=epsg, api='epsgio') time_r = datetime.datetime.now().replace(microsecond=0) # Dump Database if PostGIS was used # Drop Database if PostGIS was used if RoadsAPI == 'POSTGIS': dump_db(osm_db, os.path.join(workspace, osm_db + '.sql'), api='psql') drop_db(osm_db) return lulcShp, { 0: ('set_settings', time_b - time_a), 1: ('osm_to_sqdb', time_c - time_b), 2: ('cls_in_sqdb', time_d - time_c), 3: ('proj_data', time_e - time_d), 4: ('set_grass', time_f - time_e), 5: ('rule_1', time_g - time_f, timeCheck1), 6: ('rule_2', time_h - time_g, timeCheck2), 7: None if not timeCheck3 else ('rule_3', time_l - time_h, timeCheck3), 8: None if not timeCheck4 else ('rule_4', time_j - time_l, timeCheck4), 9: ('rule_5', time_m - time_j if timeCheck4 else time_m - time_h, timeCheck5), 10: None if not timeCheck7 else ('rule_7', time_n - time_m, timeCheck7), 11: ('disj_cls', time_o - time_n), 12: ('priority_rule', time_p - time_o), 13: ('export_cls', time_q - time_p), 14: ('merge_cls', time_r - time_q) }
def dsn_data_collection_by_multibuffer(inBuffers, workspace, db, datasource, keywords=None): """ Extract Digital Social Network Data for each sub-buffer in buffer. A sub-buffer is a buffer with a radius equals to the main buffer radius /2 and with a central point at North, South, East, West, Northeast, Northwest, Southwest and Southeast of the main buffer central point. inBuffers = { "lisbon" : { 'x' : -89004.994779, # in meters 'y' : -102815.866054, # in meters 'radius' : 10000, 'epsg' : 3763 }, "london : { 'x' : -14210.551441, # in meters 'y' : 6711542.47559, # in meters 'radius' : 10000, 'epsg' : 3857 } } or inBuffers = { "lisbon" : { "path" : /path/to/file.shp, "epsg" : 3763 } } keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless'] datasource = 'facebook' or datasource = 'flickr' TODO: Only works for Flickr and Facebook """ import os; from osgeo import ogr from gasp.pyt import obj_to_lst from gasp.sql.db import create_db from gasp.sql.to import q_to_ntbl from gasp.sql.to import df_to_db from gasp.gql.to import shp_to_psql from gasp.gt.toshp import df_to_shp from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gt.prox.bf import get_sub_buffers, dic_buffer_array_to_shp if datasource == 'flickr': from gasp.sde.dsn.flickr import photos_location elif datasource == 'facebook': from gasp.sde.dsn.fb.places import places_by_query keywords = obj_to_lst(keywords) keywords = ["None"] if not keywords else keywords # Create Database to Store Data create_db(db, overwrite=True, api='psql') for city in inBuffers: # Get Smaller Buffers if "path" in inBuffers[city]: # Get X, Y and Radius from gasp.gt.prop.feat.bf import bf_prop __bfprop = bf_prop( inBuffers[city]["path"], inBuffers[city]["epsg"], isFile=True ) inBuffers[city]["x"] = __bfprop["X"] inBuffers[city]["y"] = __bfprop["Y"] inBuffers[city]["radius"] = __bfprop["R"] inBuffers[city]["list_buffer"] = [{ 'X' : inBuffers[city]["x"], 'Y' : inBuffers[city]["y"], 'RADIUS' : inBuffers[city]['radius'], 'cardeal' : 'major' }] + get_sub_buffers( inBuffers[city]["x"], inBuffers[city]["y"], inBuffers[city]["radius"] ) # Smaller Buffers to File multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city)) dic_buffer_array_to_shp( inBuffers[city]["list_buffer"], multiBuffer, inBuffers[city]['epsg'], fields={'cardeal' : ogr.OFTString} ) # Retrive data for each keyword and buffer # Record these elements in one dataframe c = None tblData = None for bf in inBuffers[city]["list_buffer"]: for k in keywords: if datasource == 'flickr': tmpData = photos_location( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsg_out=inBuffers[city]["epsg"], onlySearchAreaContained=False ) elif datasource == 'facebook': tmpData = places_by_query( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsgOut=inBuffers[city]["epsg"], onlySearchAreaContained=False ) if type(tmpData) == int: print("NoData finded for buffer '{}' and keyword '{}'".format( bf['cardeal'], k )) continue tmpData["keyword"] = k tmpData["buffer_or"] = bf["cardeal"] if not c: tblData = tmpData c = 1 else: tblData = tblData.append(tmpData, ignore_index=True) inBuffers[city]["data"] = tblData # Get data columns names cols = inBuffers[city]["data"].columns.values dataColumns = [ c for c in cols if c != 'geom' and c != 'keyword' \ and c != 'buffer_or' and c != 'geometry' ] # Send data to PostgreSQL if 'geometry' in cols: cgeom = 'geometry' else: cgeom = 'geom' inBuffers[city]["table"] = 'tbldata_{}'.format(city) df_to_db( db, inBuffers[city]["data"], inBuffers[city]["table"], api='psql', epsg=inBuffers[city]["epsg"], geomType='POINT', colGeom=cgeom ) # Send Buffers data to PostgreSQL inBuffers[city]["pg_buffer"] = shp_to_psql( db, multiBuffer, pgTable='buffers_{}'.format(city), api="shp2pgsql", srsEpsgCode=inBuffers[city]["epsg"] ) inBuffers[city]["filter_table"] = q_to_ntbl( db, "filter_{}".format(inBuffers[city]["table"]), ( "SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer" ).format( cols = ", ".join(dataColumns), pgtable = inBuffers[city]["table"], bftable = inBuffers[city]["pg_buffer"] ), api='psql' ) inBuffers[city]["outside_table"] = q_to_ntbl( db, "outside_{}".format(inBuffers[city]["table"]), ( "SELECT * FROM (" "SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS not_intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer" ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9" ).format( cols = ", ".join(dataColumns), pgtable = inBuffers[city]["table"], bftable = inBuffers[city]["pg_buffer"] ), api='psql' ) # Union these two tables inBuffers[city]["table"] = q_to_ntbl(db, "data_{}".format(city), ( "SELECT * FROM {intbl} UNION ALL " "SELECT {cols}, keyword, geom, extracted_buffer, " "CASE WHEN array_length(not_intersect_buffer, 1) = 9 " "THEN '{array_symbol}' ELSE not_intersect_buffer END AS " "intersect_buffer FROM {outbl}" ).format( intbl = inBuffers[city]["filter_table"], outbl = inBuffers[city]["outside_table"], cols = ", ".join(dataColumns), array_symbol = '{' + '}' ), api='psql') """ Get Buffers table with info related: -> pnt_obtidos = nr pontos obtidos usando esse buffer -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas obtidos com ele -> pnt_intersect = nt pontos que se intersectam com o buffer -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao foram obtidos como buffer """ inBuffers[city]["pg_buffer"] = q_to_ntbl( db, "dt_{}".format(inBuffers[city]["pg_buffer"]), ( "SELECT main.*, get_obtidos.pnt_obtidos, " "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, " "int_not_obtained.pnt_intersect_non_obtain " "FROM {bf_table} AS main " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY gid, cardeal" ") AS get_obtidos ON main.gid = get_obtidos.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE " "GROUP BY gid, cardeal" ") AS obtidos_fora ON main.gid = obtidos_fora.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "GROUP BY gid, cardeal" ") AS intersecting ON main.gid = intersecting.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) " "GROUP BY gid, cardeal" ") AS int_not_obtained " "ON main.gid = int_not_obtained.gid " "ORDER BY main.gid" ).format( bf_table = inBuffers[city]["pg_buffer"], dt_table = inBuffers[city]["table"] ), api='psql' ) """ Get Points table with info related: -> nobtido = n vezes um ponto foi obtido -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer com o qual se intersecta -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer com o qual nao se intersecta -> nintersect = n vezes que um ponto se intersecta com um buffer -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar de se intersectar com o buffer """ inBuffers[city]["table"] = q_to_ntbl( db, "info_{}".format(city), ( "SELECT {cols}, dt.keyword, dt.geom, " "CAST(dt.extracted_buffer AS text) AS extracted_buffer, " "CAST(dt.intersect_buffer AS text) AS intersect_buffer, " "array_length(extracted_buffer, 1) AS nobtido, " "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END) AS obtido_e_intersect, " "(array_length(extracted_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, " "array_length(intersect_buffer, 1) AS nintersect, " "(array_length(intersect_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS intersect_sem_obtido " "FROM {dt_table} AS dt " "INNER JOIN {bf_table} AS bf " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY {cols}, dt.keyword, dt.geom, " "dt.extracted_buffer, dt.intersect_buffer" ).format( dt_table = inBuffers[city]["table"], bf_table = inBuffers[city]["pg_buffer"], cols = ", ".join(["dt.{}".format(x) for x in dataColumns]) ), api='psql' ) # Export Results dbtbl_to_shp( db, inBuffers[city]["table"], 'geom', os.path.join(workspace, "{}.shp".format(inBuffers[city]["table"])), api='psql', epsg=inBuffers[city]["epsg"] ) dbtbl_to_shp( db, inBuffers[city]["pg_buffer"], 'geom', os.path.join(workspace, "{}.shp".format(inBuffers[city]["pg_buffer"])), api='psql', epsg=inBuffers[city]["epsg"] ) return inBuffers
def raster_based(osmdata, nomenclature, refRaster, lulcRst, overwrite=None, dataStore=None, roadsAPI='POSTGIS'): """ Convert OSM Data into Land Use/Land Cover Information An raster based approach. TODO: Add detailed description """ # ************************************************************************ # # Python Modules from Reference Packages # # ************************************************************************ # import datetime import os import pandas import copy # ************************************************************************ # # Gasp dependencies # # ************************************************************************ # from gasp.pyt.oss import mkdir, fprop from gasp.gt.prop.ff import check_isRaster from gasp.gt.prop.prj import get_rst_epsg from gasp.gt.wenv.grs import run_grass if roadsAPI == 'POSTGIS': from gasp.sql.db import create_db from gasp.gql.to.osm import osm_to_psql from gasp.sds.osm2lulc.mod2 import roads_sqdb from gasp.sql.fm import dump_db from gasp.sql.db import drop_db else: from gasp.gt.toshp.osm import osm_to_sqdb from gasp.sds.osm2lulc.mod2 import grs_rst_roads from gasp.sds.osm2lulc.utils import osm_project, add_lulc_to_osmfeat, osmlulc_rsttbl from gasp.sds.osm2lulc.utils import get_ref_raster from gasp.sds.osm2lulc.mod1 import grs_rst from gasp.sds.osm2lulc.m3_4 import rst_area from gasp.sds.osm2lulc.mod5 import basic_buffer from gasp.sds.osm2lulc.mod6 import rst_pnt_to_build # ************************************************************************ # # Global Settings # # ************************************************************************ # # Check if input parameters exists! if not os.path.exists(os.path.dirname(lulcRst)): raise ValueError('{} does not exist!'.format(os.path.dirname(lulcRst))) if not os.path.exists(osmdata): raise ValueError( 'File with OSM DATA ({}) does not exist!'.format(osmdata)) if not os.path.exists(refRaster): raise ValueError( 'File with reference area ({}) does not exist!'.format(refRaster)) # Check if Nomenclature is valid nomenclature = "URBAN_ATLAS" if nomenclature != "URBAN_ATLAS" and \ nomenclature != "CORINE_LAND_COVER" and \ nomenclature == "GLOBE_LAND_30" else nomenclature time_a = datetime.datetime.now().replace(microsecond=0) workspace = os.path.join(os.path.dirname(lulcRst), 'osmtolulc') if not dataStore else dataStore # Check if workspace exists if os.path.exists(workspace): if overwrite: mkdir(workspace) else: raise ValueError('Path {} already exists'.format(workspace)) else: mkdir(workspace) # Get Ref Raster refRaster, epsg = get_ref_raster(refRaster, workspace, cellsize=2) from gasp.sds.osm2lulc import PRIORITIES, osmTableData, LEGEND __priorites = PRIORITIES[nomenclature] __legend = LEGEND[nomenclature] time_b = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Convert OSM file to SQLITE DB or to POSTGIS DB # # ************************************************************************ # if roadsAPI == 'POSTGIS': osm_db = create_db(fprop(osmdata, 'fn', forceLower=True), overwrite=True) osm_db = osm_to_psql(osmdata, osm_db) else: osm_db = osm_to_sqdb(osmdata, os.path.join(workspace, 'osm.sqlite')) time_c = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Add Lulc Classes to OSM_FEATURES by rule # # ************************************************************************ # add_lulc_to_osmfeat(osm_db, osmTableData, nomenclature, api=roadsAPI) time_d = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Transform SRS of OSM Data # # ************************************************************************ # osmTableData = osm_project( osm_db, epsg, api=roadsAPI, isGlobeLand=None if nomenclature != 'GLOBE_LAND_30' else True) time_e = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Start a GRASS GIS Session # # ************************************************************************ # grass_base = run_grass(workspace, grassBIN='grass78', location='grloc', srs=epsg) import grass.script as grass import grass.script.setup as gsetup gsetup.init(grass_base, workspace, 'grloc', 'PERMANENT') # ************************************************************************ # # IMPORT SOME GASP MODULES FOR GRASS GIS # # ************************************************************************ # from gasp.gt.torst import rst_to_grs, grs_to_rst from gasp.gt.nop.mos import rsts_to_mosaic from gasp.gt.wenv.grs import rst_to_region # ************************************************************************ # # SET GRASS GIS LOCATION EXTENT # # ************************************************************************ # extRst = rst_to_grs(refRaster, 'extent_raster') rst_to_region(extRst) time_f = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # MapResults # mergeOut = {} # ************************************************************************ # # ************************************************************************ # # 1 - Selection Rule # # ************************************************************************ # """ selOut = { cls_code : rst_name, ... } """ selOut, timeCheck1 = grs_rst(osm_db, osmTableData['polygons'], api=roadsAPI) for cls in selOut: mergeOut[cls] = [selOut[cls]] time_g = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 2 - Get Information About Roads Location # # ************************************************************************ # """ roads = { cls_code : rst_name, ... } """ if roadsAPI != 'POSTGIS': roads, timeCheck2 = grs_rst_roads( osm_db, osmTableData['lines'], osmTableData['polygons'], workspace, 1221 if nomenclature != "GLOBE_LAND_30" else 801) else: roadCls = 1221 if nomenclature != "GLOBE_LAND_30" else 801 roads, timeCheck2 = roads_sqdb(osm_db, osmTableData['lines'], osmTableData['polygons'], apidb='POSTGIS', asRst=roadCls) roads = {roadCls: roads} for cls in roads: if cls not in mergeOut: mergeOut[cls] = [roads[cls]] else: mergeOut[cls].append(roads[cls]) time_h = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 3 - Area Upper than # # ************************************************************************ # """ auOut = { cls_code : rst_name, ... } """ if nomenclature != 'GLOBE_LAND_30': auOut, timeCheck3 = rst_area(osm_db, osmTableData['polygons'], UPPER=True, api=roadsAPI) for cls in auOut: if cls not in mergeOut: mergeOut[cls] = [auOut[cls]] else: mergeOut[cls].append(auOut[cls]) time_l = datetime.datetime.now().replace(microsecond=0) else: timeCheck3 = None time_l = None # ************************************************************************ # # 4 - Area Lower than # # ************************************************************************ # """ alOut = { cls_code : rst_name, ... } """ if nomenclature != 'GLOBE_LAND_30': alOut, timeCheck4 = rst_area(osm_db, osmTableData['polygons'], UPPER=None, api=roadsAPI) for cls in alOut: if cls not in mergeOut: mergeOut[cls] = [alOut[cls]] else: mergeOut[cls].append(alOut[cls]) time_j = datetime.datetime.now().replace(microsecond=0) else: timeCheck4 = None time_j = None # ************************************************************************ # # 5 - Get data from lines table (railway | waterway) # # ************************************************************************ # """ bfOut = { cls_code : rst_name, ... } """ bfOut, timeCheck5 = basic_buffer(osm_db, osmTableData['lines'], workspace, apidb=roadsAPI) for cls in bfOut: if cls not in mergeOut: mergeOut[cls] = [bfOut[cls]] else: mergeOut[cls].append(bfOut[cls]) time_m = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # 7 - Assign untagged Buildings to tags # # ************************************************************************ # if nomenclature != "GLOBE_LAND_30": buildsOut, timeCheck7 = rst_pnt_to_build(osm_db, osmTableData['points'], osmTableData['polygons'], api_db=roadsAPI) for cls in buildsOut: if cls not in mergeOut: mergeOut[cls] = buildsOut[cls] else: mergeOut[cls] += buildsOut[cls] time_n = datetime.datetime.now().replace(microsecond=0) else: timeCheck7 = None time_n = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Produce LULC Map # # ************************************************************************ # """ Merge all results for one cls into one raster mergeOut = { cls_code : [rst_name, rst_name, ...], ... } into mergeOut = { cls_code : patched_raster, ... } """ for cls in mergeOut: if len(mergeOut[cls]) == 1: mergeOut[cls] = mergeOut[cls][0] else: mergeOut[cls] = rsts_to_mosaic(mergeOut[cls], 'mosaic_{}'.format(str(cls)), api="grass") time_o = datetime.datetime.now().replace(microsecond=0) """ Merge all Class Raster using a priority rule """ __priorities = PRIORITIES[nomenclature] lst_rst = [] for cls in __priorities: if cls not in mergeOut: continue else: lst_rst.append(mergeOut[cls]) outGrs = rsts_to_mosaic(lst_rst, os.path.splitext(os.path.basename(lulcRst))[0], api="grass") time_p = datetime.datetime.now().replace(microsecond=0) # Ceck if lulc Rst has an valid format outIsRst = check_isRaster(lulcRst) if not outIsRst: from gasp.pyt.oss import fprop lulcRst = os.path.join(os.path.dirname(lulcRst), fprop(lulcRst, 'fn') + '.tif') grs_to_rst(outGrs, lulcRst, as_cmd=True) osmlulc_rsttbl( nomenclature, os.path.join(os.path.dirname(lulcRst), os.path.basename(lulcRst) + '.vat.dbf')) time_q = datetime.datetime.now().replace(microsecond=0) # Dump Database if PostGIS was used # Drop Database if PostGIS was used if roadsAPI == 'POSTGIS': dump_db(osm_db, os.path.join(workspace, osm_db + '.sql'), api='psql') drop_db(osm_db) return lulcRst, { 0: ('set_settings', time_b - time_a), 1: ('osm_to_sqdb', time_c - time_b), 2: ('cls_in_sqdb', time_d - time_c), 3: ('proj_data', time_e - time_d), 4: ('set_grass', time_f - time_e), 5: ('rule_1', time_g - time_f, timeCheck1), 6: ('rule_2', time_h - time_g, timeCheck2), 7: None if not timeCheck3 else ('rule_3', time_l - time_h, timeCheck3), 8: None if not timeCheck4 else ('rule_4', time_j - time_l, timeCheck4), 9: ('rule_5', time_m - time_j if timeCheck4 else time_m - time_h, timeCheck5), 10: None if not timeCheck7 else ('rule_7', time_n - time_m, timeCheck7), 11: ('merge_rst', time_o - time_n), 12: ('priority_rule', time_p - time_o), 13: ('export_rst', time_q - time_p) }
def osm_to_relationaldb(osmData, inSchema, osmGeoTbl, osmCatTbl, osmRelTbl, outSQL=None, db_name=None): """ PostgreSQL - OSM Data to Relational Model TODO: Just work for one geom table at once E.g. osmData = '/home/jasp/flainar/osm_centro.xml' inSchema = { "TBL" : ['points', 'lines', 'multipolygons'], 'FID' : 'CAST(osm_id AS bigint)', "COLS" : [ 'name', "ST_X(wkb_geometry) AS longitude", "ST_Y(wkb_geometry) AS latitude", "wkb_geometry AS geom", "NULL AS featurecategoryid", "NULL AS flainarcategoryid", "NULL AS createdby", "NOW() AS createdon", "NULL AS updatedon", "NULL AS deletedon" ], "NOT_KEYS" : [ 'ogc_fid', 'osm_id', 'name', "wkb_geometry", 'healthcare2', 'other_tags' ] } osmGeoTbl = {"TBL" : 'position', "FID" : 'positionid'} osmCatTbl = { "TBL" : 'osmcategory', "FID" : "osmcategoryid", "KEY_COL" : "keycategory", "VAL_COL" : "value", "COLS" : [ "NULL AS createdby", "NOW() AS createdon", "NULL AS updatedon", "NULL AS deletedon" ] } osmRelTbl = { "TBL" : "position_osmcat", "FID" : 'pososmcatid' } """ from gasp.pyt import obj_to_lst from gasp.pyt.oss import fprop from gasp.sql.i import cols_name from gasp.sql.to import q_to_ntbl from gasp.sql.db import create_db inSchema["TBL"] = obj_to_lst(inSchema["TBL"]) # Create DB db = create_db(fprop(osmData, 'fn') if not db_name else db_name, api='psql') # Send OSM data to Database osm_to_psql(osmData, db) # Get KEYS COLUMNS transcols = {} for tbl in inSchema["TBL"]: transcols[tbl] = [ c for c in cols_name(db, tbl, sanitizeSpecialWords=None) if c not in inSchema["NOT_KEYS"] ] # Create osmGeoTbl osmgeotbl = [ q_to_ntbl(db, osmGeoTbl[tbl]['TBL'], ("SELECT {} AS {}, {} FROM {}").format( inSchema["FID"], osmGeoTbl[tbl]["FID"], ", ".join(inSchema["COLS"]), tbl), api='psql') for tbl in inSchema["TBL"] ] # Create OSM categories table qs = [] for tbl in inSchema["TBL"]: qs.extend([ ("SELECT '{keyV}' AS {keyC}, CAST({t}.{keyV} AS text) AS {valC} " "FROM {t} WHERE {t}.{keyV} IS NOT NULL " "GROUP BY {t}.{keyV}").format(keyV=c, t=tbl, keyC=osmCatTbl["KEY_COL"], valC=osmCatTbl["VAL_COL"]) for c in transcols[tbl] ]) osmcatbl = q_to_ntbl( db, osmCatTbl["TBL"], ("SELECT row_number() OVER(ORDER BY {keyC}) " "AS {osmcatid}, {keyC}, {valC}{ocols} " "FROM ({q}) AS foo").format( q="SELECT {k}, {v} FROM ({t}) AS kvtbl GROUP BY {k}, {v}".format( k=osmCatTbl["KEY_COL"], v=osmCatTbl["VAL_COL"], t=" UNION ALL ".join(qs), ) if len(inSchema["TBL"]) > 1 else " UNION ALL ".join(qs), keyC=osmCatTbl["KEY_COL"], osmcatid=osmCatTbl["FID"], valC=osmCatTbl["VAL_COL"], ocols="" if "COLS" not in osmCatTbl else ", {}".format(", ".join( osmCatTbl["COLS"]))), api='psql') # Create relation table osmreltbl = [] for tbl in inSchema["TBL"]: qs = [( "SELECT {fid}, '{keyV}' AS key, CAST({t}.{keyV} AS text) AS osmval " "FROM {t} WHERE {t}.{keyV} IS NOT NULL").format( fid=inSchema["FID"], keyV=c, t=tbl) for c in transcols[tbl]] osmreltbl.append( q_to_ntbl( db, osmRelTbl[tbl]["TBL"], ("SELECT foo.{fid} AS {nfid}, catbl.{osmcatfid} " "FROM ({mtbl}) AS foo INNER JOIN {catTbl} AS catbl " "ON foo.key = catbl.{catkey} AND foo.osmval = catbl.{catval}" ).format(mtbl=" UNION ALL ".join(qs), fid=inSchema["FID"], nfid=osmRelTbl[tbl]["FID"], catTbl=osmCatTbl["TBL"], osmcatfid=osmCatTbl["FID"], catkey=osmCatTbl["KEY_COL"], catval=osmCatTbl["VAL_COL"]), api='psql')) if not outSQL: return osmgeotbl, osmcatbl, osmreltbl else: from gasp.sql.fm import dump_tbls return dump_tbls(db, osmgeotbl + [osmcatbl] + osmreltbl, outSQL)
def proj(inShp, outShp, outEPSG, inEPSG=None, gisApi='ogr', sql=None, db_name=None): """ Project Geodata using GIS API's Available: * ogr; * ogr2ogr; * pandas; * ogr2ogr_SQLITE; * psql; """ import os if gisApi == 'ogr': """ Using ogr Python API """ if not inEPSG: raise ValueError( 'To use ogr API, you should specify the EPSG Code of the' ' input data using inEPSG parameter' ) from osgeo import ogr from gasp.g.lyr.fld import copy_flds from gasp.gt.prop.feat import get_gtype from gasp.gt.prop.ff import drv_name from gasp.gt.prop.prj import get_sref_from_epsg, get_trans_param from gasp.pyt.oss import fprop def copyShp(out, outDefn, lyr_in, trans): for f in lyr_in: g = f.GetGeometryRef() g.Transform(trans) new = ogr.Feature(outDefn) new.SetGeometry(g) for i in range(0, outDefn.GetFieldCount()): new.SetField(outDefn.GetFieldDefn(i).GetNameRef(), f.GetField(i)) out.CreateFeature(new) new.Destroy() f.Destroy() # ####### # # Project # # ####### # transP = get_trans_param(inEPSG, outEPSG) inData = ogr.GetDriverByName( drv_name(inShp)).Open(inShp, 0) inLyr = inData.GetLayer() out = ogr.GetDriverByName( drv_name(outShp)).CreateDataSource(outShp) outlyr = out.CreateLayer( fprop(outShp, 'fn'), get_sref_from_epsg(outEPSG), geom_type=get_gtype( inShp, name=None, py_cls=True, gisApi='ogr' ) ) # Copy fields to the output copy_flds(inLyr, outlyr) # Copy/transform features from the input to the output outlyrDefn = outlyr.GetLayerDefn() copyShp(outlyr, outlyrDefn, inLyr, transP) inData.Destroy() out.Destroy() elif gisApi == 'ogr2ogr': """ Transform SRS of any OGR Compilant Data. Save the transformed data in a new file """ if not inEPSG: from gasp.gt.prop.prj import get_epsg_shp inEPSG = get_epsg_shp(inShp) if not inEPSG: raise ValueError('To use ogr2ogr, you must specify inEPSG') from gasp import exec_cmd from gasp.gt.prop.ff import drv_name cmd = ( 'ogr2ogr -f "{}" {} {}{} -s_srs EPSG:{} -t_srs EPSG:{}' ).format( drv_name(outShp), outShp, inShp, '' if not sql else ' -dialect sqlite -sql "{}"'.format(sql), str(inEPSG), str(outEPSG) ) outcmd = exec_cmd(cmd) elif gisApi == 'ogr2ogr_SQLITE': """ Transform SRS of a SQLITE DB table. Save the transformed data in a new table """ from gasp import exec_cmd if not inEPSG: raise ValueError(( 'With ogr2ogr_SQLITE, the definition of inEPSG is ' 'demandatory.' )) # TODO: Verify if database is sqlite db, tbl = inShp['DB'], inShp['TABLE'] sql = 'SELECT * FROM {}'.format(tbl) if not sql else sql outcmd = exec_cmd(( 'ogr2ogr -update -append -f "SQLite" {db} -nln "{nt}" ' '-dialect sqlite -sql "{_sql}" -s_srs EPSG:{inepsg} ' '-t_srs EPSG:{outepsg} {db}' ).format( db=db, nt=outShp, _sql=sql, inepsg=str(inEPSG), outepsg=str(outEPSG) )) elif gisApi == 'pandas': # Test if input Shp is GeoDataframe from gasp.gt.fmshp import shp_to_obj from gasp.gt.toshp import df_to_shp df = shp_to_obj(inShp) # Project df newDf = df.to_crs({'init' : 'epsg:{}'.format(str(outEPSG))}) # Save as file return df_to_shp(df, outShp) elif gisApi == 'psql': from gasp.sql.db import create_db from gasp.pyt.oss import fprop from gasp.gql.to import shp_to_psql from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gql.prj import sql_proj # Create Database if not db_name: db_name = create_db(fprop( outShp, 'fn', forceLower=True), api='psql' ) else: from gasp.sql.i import db_exists isDb = db_exists(db_name) if not isDb: create_db(db_name, api='psql') # Import Data inTbl = shp_to_psql(db_name, inShp, api='shp2pgsql', encoding="LATIN1") # Transform oTbl = sql_proj( db_name, inTbl, fprop(outShp, 'fn', forceLower=True), outEPSG, geomCol='geom', newGeom='geom' ) # Export outShp = dbtbl_to_shp( db_name, oTbl, 'geom', outShp, api='psql', epsg=outEPSG ) else: raise ValueError('Sorry, API {} is not available'.format(gisApi)) return outShp
def tbl_to_areamtx(inShp, col_a, col_b, outXls, db=None, with_metrics=None): """ Table to Matrix Table as: FID | col_a | col_b | geom 0 | 1 | A | A | .... 0 | 2 | A | B | .... 0 | 3 | A | A | .... 0 | 4 | A | C | .... 0 | 5 | A | B | .... 0 | 6 | B | A | .... 0 | 7 | B | A | .... 0 | 8 | B | B | .... 0 | 9 | B | B | .... 0 | 10 | C | A | .... 0 | 11 | C | B | .... 0 | 11 | C | D | .... To: classe | A | B | C | D A | | | | B | | | | C | | | | D | | | | col_a = rows col_b = cols api options: * pandas; * psql; """ if not db: import pandas as pd import numpy as np from gasp.gt.fmshp import shp_to_obj from gasp.to import obj_to_tbl # Open data df = shp_to_obj(inShp) # Remove nan values by -9999 df = df[pd.notnull(df[col_a])] df = df[pd.notnull(df[col_b])] # Get Area df['realarea'] = df.geometry.area / 1000000 # Get rows and Cols rows = df[col_a].unique() cols = df[col_b].unique() refval = list(np.sort(np.unique(np.append(rows, cols)))) # Produce matrix outDf = [] for row in refval: newCols = [row] for col in refval: newDf = df[(df[col_a] == row) & (df[col_b] == col)] if not newDf.shape[0]: newCols.append(0) else: area = newDf.realarea.sum() newCols.append(area) outDf.append(newCols) outcols = ['class'] + refval outDf = pd.DataFrame(outDf, columns=outcols) if with_metrics: from gasp.pyt.dtcls.eval import get_measures_for_mtx out_df = get_measures_for_mtx(outDf, 'class') return obj_to_tbl(out_df, outXls) # Export to Excel return obj_to_tbl(outDf, outXls) else: from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.sql.i import db_exists from gasp.gql.to import shp_to_psql from gasp.gql.tomtx import tbl_to_area_mtx from gasp.to import db_to_tbl # Create database if not exists is_db = db_exists(db) if not is_db: create_db(db, api='psql') # Add data to database tbl = shp_to_psql(db, inShp, api='shp2pgsql') # Create matrix mtx = tbl_to_area_mtx(db, tbl, col_a, col_b, fprop(outXls, 'fn')) # Export result return db_to_tbl(db, mtx, outXls, sheetsNames='matrix')
def shps_to_shp(shps, outShp, api="ogr2ogr", fformat='.shp', dbname=None): """ Get all features in several Shapefiles and save them in one file api options: * ogr2ogr; * psql; * pandas; * psql; """ import os if type(shps) != list: # Check if is dir if os.path.isdir(shps): from gasp.pyt.oss import lst_ff # List shps in dir shps = lst_ff(shps, file_format=fformat) else: raise ValueError(( 'shps should be a list with paths for Feature Classes or a path to ' 'folder with Feature Classes')) if api == "ogr2ogr": from gasp import exec_cmd from gasp.gt.prop.ff import drv_name out_drv = drv_name(outShp) # Create output and copy some features of one layer (first in shps) cmdout = exec_cmd('ogr2ogr -f "{}" {} {}'.format( out_drv, outShp, shps[0])) # Append remaining layers lcmd = [ exec_cmd('ogr2ogr -f "{}" -update -append {} {}'.format( out_drv, outShp, shps[i])) for i in range(1, len(shps)) ] elif api == 'pandas': """ Merge SHP using pandas """ from gasp.gt.fmshp import shp_to_obj from gasp.gt.toshp import df_to_shp if type(shps) != list: raise ValueError( 'shps should be a list with paths for Feature Classes') dfs = [shp_to_obj(shp) for shp in shps] result = dfs[0] for df in dfs[1:]: result = result.append(df, ignore_index=True, sort=True) df_to_shp(result, outShp) elif api == 'psql': import os from gasp.sql.tbl import tbls_to_tbl, del_tables from gasp.gql.to import shp_to_psql if not dbname: from gasp.sql.db import create_db create_db(dbname, api='psql') pg_tbls = shp_to_psql(dbname, shps, api="shp2pgsql") if os.path.isfile(outShp): from gasp.pyt.oss import fprop outbl = fprop(outShp, 'fn') else: outbl = outShp tbls_to_tbl(dbname, pg_tbls, outbl) if outbl != outShp: from gasp.gt.toshp.db import dbtbl_to_shp dbtbl_to_shp(dbname, outbl, 'geom', outShp, inDB='psql', api="pgsql2shp") del_tables(dbname, pg_tbls) elif api == 'grass': from gasp import exec_cmd rcmd = exec_cmd( ("v.patch input={} output={} --overwrite --quiet").format( ",".join(shps), outShp)) else: raise ValueError("{} API is not available") return outShp
def osm2lulc(osmdata, nomenclature, refRaster, lulcRst, overwrite=None, dataStore=None, roadsAPI='POSTGIS'): """ Convert OSM data into Land Use/Land Cover Information A matrix based approach roadsAPI Options: * SQLITE * POSTGIS """ # ************************************************************************ # # Python Modules from Reference Packages # # ************************************************************************ # import os; import numpy; import datetime from threading import Thread from osgeo import gdal # ************************************************************************ # # Dependencies # # ************************************************************************ # from gasp.gt.fmrst import rst_to_array from gasp.gt.prop.ff import check_isRaster from gasp.gt.prop.rst import get_cellsize from gasp.gt.prop.prj import get_rst_epsg from gasp.pyt.oss import mkdir, copy_file from gasp.pyt.oss import fprop if roadsAPI == 'POSTGIS': from gasp.sql.db import create_db from gasp.gql.to.osm import osm_to_psql from gasp.sds.osm2lulc.mod2 import pg_num_roads from gasp.sql.fm import dump_db from gasp.sql.db import drop_db else: from gasp.gt.toshp.osm import osm_to_sqdb from gasp.sds.osm2lulc.mod2 import num_roads from gasp.sds.osm2lulc.utils import osm_project, add_lulc_to_osmfeat from gasp.sds.osm2lulc.utils import osmlulc_rsttbl from gasp.sds.osm2lulc.utils import get_ref_raster from gasp.sds.osm2lulc.mod1 import num_selection from gasp.sds.osm2lulc.m3_4 import num_selbyarea from gasp.sds.osm2lulc.mod5 import num_base_buffer from gasp.sds.osm2lulc.mod6 import num_assign_builds from gasp.gt.torst import obj_to_rst # ************************************************************************ # # Global Settings # # ************************************************************************ # # Check if input parameters exists! if not os.path.exists(os.path.dirname(lulcRst)): raise ValueError('{} does not exist!'.format(os.path.dirname(lulcRst))) if not os.path.exists(osmdata): raise ValueError('File with OSM DATA ({}) does not exist!'.format(osmdata)) if not os.path.exists(refRaster): raise ValueError('File with reference area ({}) does not exist!'.format(refRaster)) # Check if Nomenclature is valid nomenclature = "URBAN_ATLAS" if nomenclature != "URBAN_ATLAS" and \ nomenclature != "CORINE_LAND_COVER" and \ nomenclature == "GLOBE_LAND_30" else nomenclature time_a = datetime.datetime.now().replace(microsecond=0) workspace = os.path.join(os.path.dirname( lulcRst), 'num_osmto') if not dataStore else dataStore # Check if workspace exists: if os.path.exists(workspace): if overwrite: mkdir(workspace, overwrite=True) else: raise ValueError('Path {} already exists'.format(workspace)) else: mkdir(workspace, overwrite=None) # Get Ref Raster and EPSG refRaster, epsg = get_ref_raster(refRaster, workspace, cellsize=2) CELLSIZE = get_cellsize(refRaster, gisApi='gdal') from gasp.sds.osm2lulc import osmTableData, PRIORITIES time_b = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Convert OSM file to SQLITE DB or to POSTGIS DB # # ************************************************************************ # if roadsAPI == 'POSTGIS': osm_db = create_db(fprop( osmdata, 'fn', forceLower=True), overwrite=True) osm_db = osm_to_psql(osmdata, osm_db) else: osm_db = osm_to_sqdb(osmdata, os.path.join(workspace, 'osm.sqlite')) time_c = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Add Lulc Classes to OSM_FEATURES by rule # # ************************************************************************ # add_lulc_to_osmfeat(osm_db, osmTableData, nomenclature, api=roadsAPI) time_d = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # Transform SRS of OSM Data # # ************************************************************************ # osmTableData = osm_project( osm_db, epsg, api=roadsAPI, isGlobeLand=None if nomenclature != "GLOBE_LAND_30" else True ) time_e = datetime.datetime.now().replace(microsecond=0) # ************************************************************************ # # MapResults # # ************************************************************************ # mergeOut = {} timeCheck = {} RULES = [1, 2, 3, 4, 5, 7] def run_rule(ruleID): time_start = datetime.datetime.now().replace(microsecond=0) _osmdb = copy_file( osm_db, os.path.splitext(osm_db)[0] + '_r{}.sqlite'.format(ruleID) ) if roadsAPI == 'SQLITE' else None # ******************************************************************** # # 1 - Selection Rule # # ******************************************************************** # if ruleID == 1: res, tm = num_selection( _osmdb if _osmdb else osm_db, osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster, api=roadsAPI ) # ******************************************************************** # # 2 - Get Information About Roads Location # # ******************************************************************** # elif ruleID == 2: res, tm = num_roads( _osmdb, nomenclature, osmTableData['lines'], osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster ) if _osmdb else pg_num_roads( osm_db, nomenclature, osmTableData['lines'], osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster ) # ******************************************************************** # # 3 - Area Upper than # # ******************************************************************** # elif ruleID == 3: if nomenclature != "GLOBE_LAND_30": res, tm = num_selbyarea( osm_db if not _osmdb else _osmdb, osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster, UPPER=True, api=roadsAPI ) else: return # ******************************************************************** # # 4 - Area Lower than # # ******************************************************************** # elif ruleID == 4: if nomenclature != "GLOBE_LAND_30": res, tm = num_selbyarea( osm_db if not _osmdb else _osmdb, osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster, UPPER=False, api=roadsAPI ) else: return # ******************************************************************** # # 5 - Get data from lines table (railway | waterway) # # ******************************************************************** # elif ruleID == 5: res, tm = num_base_buffer( osm_db if not _osmdb else _osmdb, osmTableData['lines'], workspace, CELLSIZE, epsg, refRaster, api=roadsAPI ) # ******************************************************************** # # 7 - Assign untagged Buildings to tags # # ******************************************************************** # elif ruleID == 7: if nomenclature != "GLOBE_LAND_30": res, tm = num_assign_builds( osm_db if not _osmdb else _osmdb, osmTableData['points'], osmTableData['polygons'], workspace, CELLSIZE, epsg, refRaster, apidb=roadsAPI ) else: return time_end = datetime.datetime.now().replace(microsecond=0) mergeOut[ruleID] = res timeCheck[ruleID] = {'total': time_end - time_start, 'detailed': tm} thrds = [] for r in RULES: thrds.append(Thread( name="to_{}".format(str(r)), target=run_rule, args=(r,) )) for t in thrds: t.start() for t in thrds: t.join() # Merge all results into one Raster compileResults = {} for rule in mergeOut: for cls in mergeOut[rule]: if cls not in compileResults: if type(mergeOut[rule][cls]) == list: compileResults[cls] = mergeOut[rule][cls] else: compileResults[cls] = [mergeOut[rule][cls]] else: if type(mergeOut[rule][cls]) == list: compileResults[cls] += mergeOut[rule][cls] else: compileResults[cls].append(mergeOut[rule][cls]) time_m = datetime.datetime.now().replace(microsecond=0) # All Rasters to Array arrayRst = {} for cls in compileResults: for raster in compileResults[cls]: if not raster: continue array = rst_to_array(raster) if cls not in arrayRst: arrayRst[cls] = [array.astype(numpy.uint8)] else: arrayRst[cls].append(array.astype(numpy.uint8)) time_n = datetime.datetime.now().replace(microsecond=0) # Sum Rasters of each class for cls in arrayRst: if len(arrayRst[cls]) == 1: sumArray = arrayRst[cls][0] else: sumArray = arrayRst[cls][0] for i in range(1, len(arrayRst[cls])): sumArray = sumArray + arrayRst[cls][i] arrayRst[cls] = sumArray time_o = datetime.datetime.now().replace(microsecond=0) # Apply priority rule __priorities = PRIORITIES[nomenclature + "_NUMPY"] for lulcCls in __priorities: __lulcCls = rstcls_map(lulcCls) if __lulcCls not in arrayRst: continue else: numpy.place(arrayRst[__lulcCls], arrayRst[__lulcCls] > 0, lulcCls ) for i in range(len(__priorities)): lulc_i = rstcls_map(__priorities[i]) if lulc_i not in arrayRst: continue else: for e in range(i+1, len(__priorities)): lulc_e = rstcls_map(__priorities[e]) if lulc_e not in arrayRst: continue else: numpy.place(arrayRst[lulc_e], arrayRst[lulc_i] == __priorities[i], 0 ) time_p = datetime.datetime.now().replace(microsecond=0) # Merge all rasters startCls = 'None' for i in range(len(__priorities)): lulc_i = rstcls_map(__priorities[i]) if lulc_i in arrayRst: resultSum = arrayRst[lulc_i] startCls = i break if startCls == 'None': return 'NoResults' for i in range(startCls + 1, len(__priorities)): lulc_i = rstcls_map(__priorities[i]) if lulc_i not in arrayRst: continue resultSum = resultSum + arrayRst[lulc_i] # Save Result outIsRst = check_isRaster(lulcRst) if not outIsRst: from gasp.pyt.oss import fprop lulcRst = os.path.join( os.path.dirname(lulcRst), fprop(lulcRst, 'fn') + '.tif' ) numpy.place(resultSum, resultSum==0, 1) obj_to_rst(resultSum, lulcRst, refRaster, noData=1) osmlulc_rsttbl(nomenclature + "_NUMPY", os.path.join( os.path.dirname(lulcRst), os.path.basename(lulcRst) + '.vat.dbf' )) time_q = datetime.datetime.now().replace(microsecond=0) # Dump Database if PostGIS was used # Drop Database if PostGIS was used if roadsAPI == 'POSTGIS': dump_db(osm_db, os.path.join(workspace, osm_db + '.sql'), api='psql') drop_db(osm_db) return lulcRst, { 0 : ('set_settings', time_b - time_a), 1 : ('osm_to_sqdb', time_c - time_b), 2 : ('cls_in_sqdb', time_d - time_c), 3 : ('proj_data', time_e - time_d), 4 : ('rule_1', timeCheck[1]['total'], timeCheck[1]['detailed']), 5 : ('rule_2', timeCheck[2]['total'], timeCheck[2]['detailed']), 6 : None if 3 not in timeCheck else ( 'rule_3', timeCheck[3]['total'], timeCheck[3]['detailed']), 7 : None if 4 not in timeCheck else ( 'rule_4', timeCheck[4]['total'], timeCheck[4]['detailed']), 8 : ('rule_5', timeCheck[5]['total'], timeCheck[5]['detailed']), 9 : None if 7 not in timeCheck else ( 'rule_7', timeCheck[7]['total'], timeCheck[7]['detailed']), 10 : ('rst_to_array', time_n - time_m), 11 : ('sum_cls', time_o - time_n), 12 : ('priority_rule', time_p - time_o), 13 : ('merge_rst', time_q - time_p) }
def v_break_at_points(workspace, loc, lineShp, pntShp, db, srs, out_correct, out_tocorrect): """ Break lines at points - Based on GRASS GIS v.edit Use PostGIS to sanitize the result TODO: Confirm utility Problem: GRASS GIS always uses the first line to break. """ import os from gasp.gql.to import shp_to_psql from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gt.wenv.grs import run_grass from gasp.pyt.oss import fprop from gasp.sql.db import create_db from gasp.sql.to import q_to_ntbl tmpFiles = os.path.join(workspace, loc) gbase = run_grass(workspace, location=loc, srs=srs) import grass.script as grass import grass.script.setup as gsetup gsetup.init(gbase, workspace, loc, 'PERMANENT') from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp grsLine = shp_to_grs( lineShp, fprop(lineShp, 'fn', forceLower=True) ) vedit_break(grsLine, pntShp, geomType='line') LINES = grs_to_shp(grsLine, os.path.join( tmpFiles, grsLine + '_v1.shp'), 'line') # Sanitize output of v.edit.break using PostGIS create_db(db, overwrite=True, api='psql') LINES_TABLE = shp_to_psql( db, LINES, srsEpsgCode=srs, pgTable=fprop(LINES, 'fn', forceLower=True), api="shp2pgsql" ) # Delete old/original lines and stay only with the breaked one Q = ( "SELECT {t}.*, foo.cat_count FROM {t} INNER JOIN (" "SELECT cat, COUNT(cat) AS cat_count, " "MAX(ST_Length(geom)) AS max_len " "FROM {t} GROUP BY cat" ") AS foo ON {t}.cat = foo.cat " "WHERE foo.cat_count = 1 OR foo.cat_count = 2 OR (" "foo.cat_count = 3 AND ST_Length({t}.geom) <= foo.max_len)" ).format(t=LINES_TABLE) CORR_LINES = q_to_ntbl( db, "{}_corrected".format(LINES_TABLE), Q, api='psql' ) # TODO: Delete Rows that have exactly the same geometry # Highlight problems that the user must solve case by case Q = ( "SELECT {t}.*, foo.cat_count FROM {t} INNER JOIN (" "SELECT cat, COUNT(cat) AS cat_count FROM {t} GROUP BY cat" ") AS foo ON {t}.cat = foo.cat " "WHERE foo.cat_count > 3" ).format(t=LINES_TABLE) ERROR_LINES = q_to_ntbl( db, "{}_not_corr".format(LINES_TABLE), Q, api='psql' ) dbtbl_to_shp( db, CORR_LINES, "geom", out_correct, api="pgsql2shp" ) dbtbl_to_shp( db, ERROR_LINES, "geom", out_tocorrect, api="pgsql2shp" )