def split_colval_into_cols(db_name, table, column, splitChar, new_cols, new_table): """ Split column value into several columns """ from gasp.sql.i import cols_name if type(new_cols) != list: raise ValueError('new_cols should be a list') nr_cols = len(new_cols) if nr_cols < 2: raise ValueError('new_cols should have 2 or more elements') # Get columns types from table tblCols = cols_name(db_name, table) # SQL construction SQL = "SELECT {}, {} FROM {}".format( ", ".join(tblCols), ", ".join([ "split_part({}, '{}', {}) AS {}".format(column, splitChar, i + 1, new_cols[i]) for i in range(len(new_cols)) ]), table) q_to_ntbl(db_name, new_table, SQL, api='psql') return new_table
def split_table_by_col_distinct(db, tbl, col): """ Create a new table for each value in one column """ from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.sql.to import q_to_ntbl fields_types = cols_type(db, tbl) # Get unique values VALUES = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col}".format( col=col, t=tbl ), db_api='psql' )[col].tolist() whr = '{}=\'{}\'' if fields_types[col] == str else '{}={}' for row in VALUES: q_to_ntbl( db, '{}_{}'.format(tbl, str(row[0])), "SELECT * FROM {} WHERE {}".format( tbl, whr.format(col, str(row[0])) ), api='psql')
def splite_buffer(db, table, dist, geomField, outTbl, cols_select=None, bufferField="geometry", whrClause=None, outTblIsFile=None, dissolve=None): """ Run ST_Buffer if not dissolve, no generalization will be applied; if dissolve == to str or list, a generalization will be accomplish using the fields referenced by this object; if dissolve == 'ALL', all features will be dissolved. """ from gasp.pyt import obj_to_lst dissolve = obj_to_lst(dissolve) if dissolve != "ALL" else "ALL" sql = ( "SELECT{sel}{spFunc}{geom}, {_dist}{endFunc} AS {bf} " "FROM {tbl}{whr}{grpBy}" ).format( sel = " " if not cols_select else " {}, ".format( ", ".join(obj_to_lst(cols_select)) ), tbl=table, geom=geomField, _dist=str(dist), bf=bufferField, whr="" if not whrClause else " WHERE {}".format(whrClause), spFunc="ST_Buffer(" if not dissolve else \ "ST_UnaryUnion(ST_Collect(ST_Buffer(", endFunc = ")" if not dissolve else ")))", grpBy="" if not dissolve or dissolve == "ALL" else " GROUP BY {}".format( ", ".join(dissolve) ) ) if outTblIsFile: from gasp.gt.attr import sel_by_attr sel_by_attr(db, sql, outTbl, api_gis='ogr') else: from gasp.sql.to import q_to_ntbl q_to_ntbl(db, outTbl, sql, api='ogr2ogr') return outTbl
def split_table_entity_number(db, table, entity_field, entity_number): """ Split tables in several using as reference a number of entities per table If a table has 1 000 000 entities and the entity_number is 250 000, this method will create four tables, each one with 250 000 entities. 250 000 entities, not rows. Don't forget that the main table may have more than one reference to the same entity. """ import pandas from gasp.sql.fm import q_to_obj from gasp.sql.i import cols_type from gasp.sql.to import q_to_ntbl # Select entities in table entities = q_to_obj(db, "SELECT {c} FROM {t} GROUP BY {c}".format( c=entity_field, t=table ), db_api='psql') # Split entities into groups acoording entity_number entityGroup = [] lower = 0 high = entity_number while lower <= len(entities.index): if high > len(entities.index): high = len(entities.index) entityGroup.append(entities.iloc[lower : high]) lower += entity_number high += entity_number # For each dataframe, create a new table COLS_TYPE = cols_type(db, table) c = 0 for df in entityGroup: if COLS_TYPE[entity_field] != str: df[entity_field] = '{}='.format(entity_field) + df[entity_field].astype(str) else: df[entity_field] = '{}=\''.format(entity_field) + df[entity_field].astype(str) + '\'' whr = ' OR '.join(df[entity_field]) q_to_ntbl(db, '{}_{}'.format(table, str(c)), ( "SELECT * FROM {} WHERE {}" ).format(table, whr), api='psql') c += 1
def txt_cols_to_col(db, inTable, columns, strSep, newCol, outTable=None): """ Several text columns to a single column """ from gasp.pyt import obj_to_lst from gasp.sql.i import cols_type mergeCols = obj_to_lst(columns) tblCols = cols_type(db, inTable, sanitizeColName=None, pyType=False) for col in mergeCols: if tblCols[col] != 'text' and tblCols[col] != 'varchar': raise ValueError('{} should be of type text'.format(col)) coalesce = "" for i in range(len(mergeCols)): if not i: coalesce += "COALESCE({}, '')".format(mergeCols[i]) else: coalesce += " || '{}' || COALESCE({}, '')".format( strSep, mergeCols[i]) if outTable: # Write new table colsToSelect = [_c for _c in tblCols if _c not in mergeCols] if not colsToSelect: sel = coalesce + " AS {}".format(newCol) else: sel = "{}, {}".format(", ".join(colsToSelect), coalesce + " AS {}".format(newCol)) q_to_ntbl(db, outTable, "SELECT {} FROM {}".format(sel, inTable), api='psql') return outTable else: # Add column to inTable from gasp.sql.tbl import update_table add_field(db, inTable, {newCol: 'text'}) update_table(db, inTable, {newCol: coalesce}) return inTable
def trim_char_in_col(db, pgtable, cols, trim_str, outTable, onlyTrailing=None, onlyLeading=None): """ Python implementation of the TRIM PSQL Function The PostgreSQL trim function is used to remove spaces or set of characters from the leading or trailing or both side from a string. """ from gasp.pyt import obj_to_lst from gasp.sql.i import cols_type cols = obj_to_lst(cols) colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False) for col in cols: if colsTypes[col] != 'text' and colsTypes[col] != 'varchar': raise ValueError('{} should be of type text'.format(col)) colsToSelect = [_c for _c in colsTypes if _c not in cols] tail_lead_str = "" if not onlyTrailing and not onlyLeading else \ "TRAILING " if onlyTrailing and not onlyLeading else \ "LEADING " if not onlyTrailing and onlyLeading else "" trimCols = [ "TRIM({tol}{char} FROM {c}) AS {c}".format(c=col, tol=tail_lead_str, char=trim_str) for col in cols ] if not colsToSelect: cols_to_select = "{}".format(", ".join(trimCols)) else: cols_to_select = "{}, {}".format(", ".join(colsToSelect), ", ".join(trimCols)) q_to_ntbl(db, outTable, "SELECT {} FROM {}".format(colsToSelect, pgtable), api='psql')
def pnts_to_lines(db, inTable, outTable, entityCol, orderCol, geomCol=None, xCol=None, yCol=None, epsg=4326): """ Given a table with points by entity, create a new table with a polyline for each entity. The points are added to the polyline based on a sequence in one column. """ if not geomCol: if not xCol or not yCol: raise ValueError( 'If geomCol is not specified, xCol and ycol must replace it!') from gasp.sql.to import q_to_ntbl geomRef = geomCol if geomCol else "ST_MakePoint({}, {})".format(xCol, yCol) Q = ("SELECT {entCol}, ST_SetSRID(ST_MakeLine(" "array_agg({pntCol} ORDER BY {orderF})), {srs}) " "FROM {tbl} GROUP BY {entCol}").format(entCol=entityCol, pntCol=geomRef, orderF=orderCol, srs=epsg, tbl=inTable) return q_to_ntbl(db, outTable, Q, api='psql')
def geom_to_points(db, table, geomCol, outTable, selCols=None, newGeomCol=None): """ Convert a Polygon/Polyline Geometry to Points Equivalent to feature to point tool """ from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl selCols = obj_to_lst(selCols) Q = ("SELECT {cols}(ST_DumpPoints({geom})).geom AS {newCol} " "FROM {tbl}").format( cols="" if not selCols else "{}, ".format(", ".join(selCols)), geom=geomCol, newCol="geom" if not newGeomCol else newGeomCol, tbl=table) return q_to_ntbl(db, outTable, Q, api='psql')
def select_main_geom_type(db, table, outbl, geomCol='geom'): """ Assuming a table with several geometry types, this method counts the rows for each geometry type and select the rows with a geometry type with more rows """ from gasp.sql.to import q_to_ntbl from gasp.sql.i import cols_name COLS = [x for x in cols_name( db, table, sanitizeSpecialWords=None ) if x != geomCol] Q = ( "SELECT {cols}, {geomcol} FROM (" "SELECT *, MAX(jtbl.geom_cont) OVER (PARTITION BY " "jtbl.tst) AS max_cnt FROM (" "SELECT {cols}, (ST_Dump({geomcol})).geom AS {geomcol}, " "ST_GeometryType((ST_Dump({geomcol})).geom) AS geom_type " "FROM {tbl}" ") AS foo INNER JOIN (" "SELECT ST_GeometryType((ST_Dump({geomcol})).geom) AS gt, " "COUNT(ST_GeometryType((ST_Dump({geomcol})).geom)) AS geom_cont, " "1 AS tst FROM {tbl} GROUP BY ST_GeometryType((ST_Dump({geomcol})).geom)" ") AS jtbl ON foo.geom_type = jtbl.gt" ") AS foo WHERE geom_cont = max_cnt" ).format( cols=", ".join(COLS), geomcol=geomCol, tbl=table ) return q_to_ntbl(db, outbl, Q, api='psql')
def rows_notin_q(db, tblA, tblB, joinCols, newTable, cols_to_mantain=None, tblAisQuery=None, tblBisQuery=None): """ Get rows from tblA that are not present in tblB joinCols = {colTblA : colTblB} """ from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl cols_to_mantain = obj_to_lst(cols_to_mantain) q = ( "SELECT {cls} FROM {ta} LEFT JOIN {tb} ON " "{rel} WHERE {tblB}.{fldB} IS NULL" ).format( cls=cols_to_mantain if cols_to_mantain else "{}.*".format(tblA), ta=tblA if not tblAisQuery else tblAisQuery, tb=tblB if not tblBisQuery else tblBisQuery, rel=" AND ".join(["{ta}.{ca} = {tb}.{cb}".format( ta=tblA, tb=tblB, ca=k, cb=joinCols[k] ) for k in joinCols]) ) newTable = q_to_ntbl(db, newTable, q, api='psql') return newTable
def matrix_od_mean_dist_by_group(MATRIX_OD, ORIGIN_COL, GROUP_ORIGIN_ID, GROUP_ORIGIN_NAME, GROUP_DESTINA_ID, GROUP_DESTINA_NAME, TIME_COL, epsg, db, RESULT_MATRIX): """ Calculate Mean GROUP distance from OD Matrix OD MATRIX EXAMPLE | origin_entity | origin_group | destina_entity | destina_group | distance | XXXX | XXXX | XXXX | XXX | XXX OUTPUT EXAMPLE | origin_group | destina_group | mean_distance | XXXX | XXXX | XXXX """ import os from gasp.pyt.oss import fprop from gasp.gql.to import shp_to_psql from gasp.sql.db import create_db from gasp.sql.to import q_to_ntbl from gasp.to import db_to_tbl db = create_db(fprop(MATRIX_OD, 'fn'), overwrite=True, api='psql') TABLE = shp_to_psql(db, MATRIX_OD, pgTable="tbl_{}".format(db), api="pandas", srsEpsgCode=epsg) OUT_TABLE = q_to_ntbl( db, fprop(RESULT_MATRIX, 'fn'), ("SELECT {groupOriginCod}, {groupOriginName}, {groupDestCod}, " "{groupDestName}, AVG(mean_time) AS mean_time FROM (" "SELECT {origin}, {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName}, " "AVG({timeCol}) AS mean_time FROM {t} " "GROUP BY {origin}, {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName}" ") AS foo " "GROUP BY {groupOriginCod}, {groupOriginName}, " "{groupDestCod}, {groupDestName} " "ORDER BY {groupOriginCod}, {groupDestCod}").format( groupOriginCod=GROUP_ORIGIN_ID, groupOriginName=GROUP_ORIGIN_NAME, groupDestCod=GROUP_DESTINA_ID, groupDestName=GROUP_DESTINA_NAME, origin=ORIGIN_COL, timeCol=TIME_COL, t=TABLE), api='psql') return db_to_tbl(db, "SELECT * FROM {}".format(OUT_TABLE), RESULT_MATRIX, sheetsNames="matrix", dbAPI='psql')
def st_dissolve(db, table, geomColumn, outTable, whrClause=None, diss_cols=None, outTblIsFile=None, api='sqlite'): """ Dissolve a Polygon table """ from gasp.pyt import obj_to_lst diss_cols = obj_to_lst(diss_cols) if diss_cols else None geomcol = "geometry" if api == 'sqlite' else 'geom' sql = ( "SELECT{selCols} ST_UnaryUnion(ST_Collect({geom})) AS {gout} " "FROM {tbl}{whr}{grpBy}" ).format( selCols="" if not diss_cols else " {},".format(", ".join(diss_cols)), geom=geomColumn, tbl=table, whr="" if not whrClause else " WHERE {}".format(whrClause), grpBy="" if not diss_cols else " GROUP BY {}".format( ", ".join(diss_cols) ), gout=geomcol ) if outTblIsFile: if api == 'sqlite': from gasp.gt.attr import sel_by_attr sel_by_attr(db, sql, outTable, api_gis='ogr') elif api == 'psql': from gasp.gt.toshp.db import dbtbl_to_shp dbtbl_to_shp( db, table, geomColumn, outTable, api='pgsql2shp', tableIsQuery=True ) else: from gasp.sql.to import q_to_ntbl q_to_ntbl( db, outTable, sql, api='ogr2ogr' if api == 'sqlite' else 'psql' ) return outTable
def st_buffer(db, inTbl, bfDist, geomCol, outTbl, bufferField="geometry", whrClause=None, dissolve=None, cols_select=None, outTblIsFile=None): """ Using Buffer on PostGIS Data """ from gasp.pyt import obj_to_lst dissolve = obj_to_lst(dissolve) if dissolve != "ALL" else "ALL" SEL_COLS = "" if not cols_select else ", ".join(obj_to_lst(cols_select)) DISS_COLS = "" if not dissolve or dissolve == "ALL" else ", ".join( dissolve) GRP_BY = "" if not dissolve else "{}, {}".format(SEL_COLS, DISS_COLS) if \ SEL_COLS != "" and DISS_COLS != "" else SEL_COLS \ if SEL_COLS != "" else DISS_COLS if DISS_COLS != "" else "" Q = ( "SELECT{sel}{spFunc}{geom}, {_dist}{endFunc} AS {bf} " "FROM {t}{whr}{grpBy}" ).format( sel = " " if not cols_select else " {}, ".format(SEL_COLS), spFunc="ST_Buffer(" if not dissolve else \ "ST_UnaryUnion(ST_Collect(ST_Buffer(", geom=geomCol, _dist=bfDist, endFunc=")" if not dissolve else ")))", t=inTbl, grpBy=" GROUP BY {}".format(GRP_BY) if GRP_BY != "" else "", whr="" if not whrClause else " WHERE {}".format(whrClause), bf=bufferField ) if not outTblIsFile: from gasp.sql.to import q_to_ntbl outTbl = q_to_ntbl(db, outTbl, Q, api='psql') else: from gasp.gt.toshp.db import dbtbl_to_shp dbtbl_to_shp(db, Q, bufferField, outTbl, api='pgsql2shp', tableIsQuery=True) return outTbl
def replace_char_in_col(db, pgtable, cols, match_str, replace_str, outTable): """ Replace char in all columns in cols for the value of replace_str Python implementation of the REPLACE PSQL Function """ from gasp.pyt import obj_to_lst from gasp.sql.i import cols_type cols = obj_to_lst(cols) colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False) for col in cols: if colsTypes[col] != 'text' and colsTypes[col] != 'varchar': raise ValueError('{} should be of type text'.format(col)) colsToSelect = [_c for _c in colsTypes if _c not in cols] colsReplace = [ "REPLACE({c}, '{char}', '{nchar}') AS {c}".format(c=col, char=match_str, nchar=replace_str) for col in cols ] if not colsToSelect: cols_to_select = "{}".format(", ".join(colsReplace)) else: cols_to_select = "{}, {}".format(", ".join(colsToSelect), ", ".join(colsReplace)) q_to_ntbl(db, outTable, "SELECT {cols} FROM {tbl}".format(cols=cols_to_select, tbl=pgtable), api='psql') return outTable
def tbls_to_tbl(db, lst_tables, outTable): """ Append all tables in lst_tables into the outTable """ from gasp.sql.to import q_to_ntbl sql = " UNION ALL ".join( ["SELECT * FROM {}".format(t) for t in lst_tables]) outTable = q_to_ntbl(db, outTable, sql, api='psql') return outTable
def split_table_by_range(db, table, row_number): """ Split tables in several """ from gasp.sql.i import cols_name, row_num from gasp.sql.to import q_to_ntbl rowsN = row_num(db, table, api='psql') nrTables = int(rowsN / float(row_number)) + 1 COLS = cols_name(db, table) offset = 0 for i in range(nrTables): q_to_ntbl( db, '{}_{}'.format(table, str(i)), "SELECT * FROM {} ORDER BY {} OFFSET {} LIMIT {} ;".format( table, ', '.join(COLS), str(offset), str(row_number) ), api='psql' ) offset += row_number
def col_to_timestamp(db, inTbl, dayCol, hourCol, minCol, secCol, newTimeCol, outTbl, selColumns=None, whr=None): """ Columns to timestamp column """ from gasp.pyt import obj_to_lst selCols = obj_to_lst(selColumns) sql = ("SELECT {C}, TO_TIMESTAMP(" "COALESCE(CAST({day} AS text), '') || ' ' || " "COALESCE(CAST({hor} AS text), '') || ':' || " "COALESCE(CAST({min} AS text), '') || ':' || " "COALESCE(CAST({sec} AS text), ''), 'YYYY-MM-DD HH24:MI:SS'" ") AS {TC} FROM {T}{W}").format( C="*" if not selCols else ", ".join(selCols), day=dayCol, hor=hourCol, min=minCol, sec=secCol, TC=newTimeCol, T=inTbl, W="" if not whr else " WHERE {}".format(whr)) q_to_ntbl(db, outTbl, sql, api='psql') return outTbl
def lnh_to_polg(db, intbl, outtbl): """ Line to Polygons """ from gasp.sql.to import q_to_ntbl Q = ("SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS gid, " "(ST_Dump(ST_Polygonize(geom))).geom AS geom FROM (" "SELECT ST_Node(ST_Collect(geom)) AS geom FROM (" "SELECT (ST_Dump(geom)).geom FROM {}" ") AS foo" ") AS foo").format(intbl) return q_to_ntbl(db, outtbl, Q)
def sel_where_groupByIs(db, table, groupByCols, grpByOp, grpByVal, outTable, filterWhere=None): """ Select rows in table where the GROUP BY values of the groupByCols agrees with the statment formed by grpByOp and grpByVal For the following parameters: table=tst_table, groupByCols=[day, hour], grpByOp=>, grpByVal=1 This method will create a new table using a query such SELECT tst_table.* FROM tst_table INNER JOIN ( SELECT day, hour, COUNT(day) AS cnt_day FROM tst_table GROUP BY day, hour ) AS foo ON tst_table.day = foo.day AND tst_table.hour = foo.hour WHERE foo.cnt_day > 1 """ from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl groupByCols = obj_to_lst(groupByCols) q = ("SELECT {t}.* FROM {t} INNER JOIN (" "SELECT {cls}, COUNT({col}) AS cnt_{col} " "FROM {t} GROUP BY {cls}" ") AS foo ON {jOn} " "WHERE foo.cnt_{col} {op} {val}{fwhr}").format( t=table, cls=", ".join(groupByCols), col=groupByCols[0], jOn=" AND ".join([ "{t}.{c} = foo.{c}".format(t=table, c=x) for x in groupByCols ]), op=grpByOp, val=grpByVal, fwhr="" if not filterWhere else " AND ({})".format(filterWhere)) outTable = q_to_ntbl(db, outTable, q, api='psql') return outTable
def sql_proj(dbname, tbl, otbl, oepsg, cols=None, geomCol=None, newGeom=None, whr=None, new_pk=None): """ Reproject geometric layer to another spatial reference system (srs) """ from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl geomCol = 'geom' if not geomCol else geomCol newGeom = 'geom' if not newGeom else newGeom if not cols: from gasp.sql.i import cols_name cols = cols_name(dbname, tbl) cols.remove(geomCol) else: cols = obj_to_lst(cols) if geomCol in cols and geomCol == newGeom: cols.remove(geomCol) cols.append('{c} AS old_{c}'.format(c=geomCol)) Q = ("SELECT {}, ST_Transform({}, {}) AS {} " "FROM {}{}").format(", ".join(cols), geomCol, str(oepsg), newGeom, tbl, "" if not whr else " WHERE {}".format(whr)) otbl = q_to_ntbl(dbname, otbl, Q, api='psql') if new_pk: from gasp.sql.k import create_pk create_pk(dbname, otbl, new_pk) return otbl
def add_endpnt_to_tbl(db, inTable, outTable, idCol='gid', geomCol='geom', startCol="start_vertex", endCol="end_vertex"): """ Add start/end points columns to table """ from gasp.sql.to import q_to_ntbl from gasp.sql.i import cols_name return q_to_ntbl(db, outTable, ("SELECT {cols}, {stPnt}, {endPnt} FROM (" "SELECT *, lead({stPnt}) OVER (" "PARTITION BY {colId} ORDER BY pnt_idx) AS {endPnt} " "FROM (" "SELECT {cols}, pnt_idx, {stPnt}, " "CASE " "WHEN pnt_idx = 1 OR pnt_idx = MAX(pnt_idx) " "OVER (PARTITION BY {colId}) " "THEN 1 ELSE 0 END AS pnt_cat " "FROM (" "SELECT {cols}, " "(ST_DumpPoints({geomF})).path[1] AS pnt_idx, " "(ST_DumpPoints({geomF})).geom AS {stPnt} " "FROM {table}" ") AS foo" ") AS foo2 " "WHERE pnt_cat = 1" ") AS foo3 " "WHERE {endPnt} IS NOT NULL " "ORDER BY {colId}, pnt_idx").format(cols=", ".join( cols_name(db, inTable)), stPnt=startCol, endPnt=endCol, colId=idCol, geomF=geomCol, table=inTable), api='psql')
def xycols_to_geom(db, intbl, x_col, y_col, outtable, geom_field='geom', epsg=4326): """ X and Y Colums to PostGIS Geom Column """ from gasp.sql.to import q_to_ntbl return q_to_ntbl(db, outtable, ("SELECT *, ST_SetSRID(ST_MakePoint({}, {}), {}) AS {} " "FROM {}").format(x_col, y_col, str(epsg), geom_field, intbl), api='psql')
def split_lines_on_pnt(db, inTbl, pntTbl, outTbl, idlnhPnt, lnhid): """ Split lines on point locations """ from gasp.sql.i import cols_name from gasp.sql.to import q_to_ntbl # Get cols of lnhTbl cols = ", ".join([ c for c in cols_name(db, inTbl, sanitizeSpecialWords=True, api='psql') if c != 'geom' and c != idlnhPnt ]) # Force MultiLineString to LineString sanQ = ("SELECT {lid}, {cln}, (ST_Dump(geom)).geom AS geom " "FROM {t}) AS mtbl").format(lid=lnhid, cln=cols, t=inTbl) # Split Query Q = ("SELECT {lid}, {cln}, (ST_Dump(geom)).geom AS geom FROM (" "SELECT mtbl.{lid}, {cln}, " "CASE " "WHEN jtbl.{pid} IS NULL THEN mtbl.geom " "ELSE ST_Split(mtbl.geom, jtbl.geom) " "END AS geom " "FROM {lnh_tbl} LEFT JOIN (" "SELECT {pid}, ST_Collect(geom) AS geom " "FROM {pnt_tbl} " "GROUP BY {pid}" ") AS jtbl on mtbl.{lid} = jtbl.{pid}" ") AS foo").format(lid=lnhid, cln=cols, pid=idlnhPnt, lnh_tbl=sanQ, pnt_tbl=pntTbl) # Produce new table and return it return q_to_ntbl(db, outTbl, Q)
def dsn_data_collection_by_multibuffer(inBuffers, workspace, db, datasource, keywords=None): """ Extract Digital Social Network Data for each sub-buffer in buffer. A sub-buffer is a buffer with a radius equals to the main buffer radius /2 and with a central point at North, South, East, West, Northeast, Northwest, Southwest and Southeast of the main buffer central point. inBuffers = { "lisbon" : { 'x' : -89004.994779, # in meters 'y' : -102815.866054, # in meters 'radius' : 10000, 'epsg' : 3763 }, "london : { 'x' : -14210.551441, # in meters 'y' : 6711542.47559, # in meters 'radius' : 10000, 'epsg' : 3857 } } or inBuffers = { "lisbon" : { "path" : /path/to/file.shp, "epsg" : 3763 } } keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless'] datasource = 'facebook' or datasource = 'flickr' TODO: Only works for Flickr and Facebook """ import os; from osgeo import ogr from gasp.pyt import obj_to_lst from gasp.sql.db import create_db from gasp.sql.to import q_to_ntbl from gasp.sql.to import df_to_db from gasp.gql.to import shp_to_psql from gasp.gt.toshp import df_to_shp from gasp.gt.toshp.db import dbtbl_to_shp from gasp.gt.prox.bf import get_sub_buffers, dic_buffer_array_to_shp if datasource == 'flickr': from gasp.sde.dsn.flickr import photos_location elif datasource == 'facebook': from gasp.sde.dsn.fb.places import places_by_query keywords = obj_to_lst(keywords) keywords = ["None"] if not keywords else keywords # Create Database to Store Data create_db(db, overwrite=True, api='psql') for city in inBuffers: # Get Smaller Buffers if "path" in inBuffers[city]: # Get X, Y and Radius from gasp.gt.prop.feat.bf import bf_prop __bfprop = bf_prop( inBuffers[city]["path"], inBuffers[city]["epsg"], isFile=True ) inBuffers[city]["x"] = __bfprop["X"] inBuffers[city]["y"] = __bfprop["Y"] inBuffers[city]["radius"] = __bfprop["R"] inBuffers[city]["list_buffer"] = [{ 'X' : inBuffers[city]["x"], 'Y' : inBuffers[city]["y"], 'RADIUS' : inBuffers[city]['radius'], 'cardeal' : 'major' }] + get_sub_buffers( inBuffers[city]["x"], inBuffers[city]["y"], inBuffers[city]["radius"] ) # Smaller Buffers to File multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city)) dic_buffer_array_to_shp( inBuffers[city]["list_buffer"], multiBuffer, inBuffers[city]['epsg'], fields={'cardeal' : ogr.OFTString} ) # Retrive data for each keyword and buffer # Record these elements in one dataframe c = None tblData = None for bf in inBuffers[city]["list_buffer"]: for k in keywords: if datasource == 'flickr': tmpData = photos_location( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsg_out=inBuffers[city]["epsg"], onlySearchAreaContained=False ) elif datasource == 'facebook': tmpData = places_by_query( bf, inBuffers[city]["epsg"], keyword=k if k != 'None' else None, epsgOut=inBuffers[city]["epsg"], onlySearchAreaContained=False ) if type(tmpData) == int: print("NoData finded for buffer '{}' and keyword '{}'".format( bf['cardeal'], k )) continue tmpData["keyword"] = k tmpData["buffer_or"] = bf["cardeal"] if not c: tblData = tmpData c = 1 else: tblData = tblData.append(tmpData, ignore_index=True) inBuffers[city]["data"] = tblData # Get data columns names cols = inBuffers[city]["data"].columns.values dataColumns = [ c for c in cols if c != 'geom' and c != 'keyword' \ and c != 'buffer_or' and c != 'geometry' ] # Send data to PostgreSQL if 'geometry' in cols: cgeom = 'geometry' else: cgeom = 'geom' inBuffers[city]["table"] = 'tbldata_{}'.format(city) df_to_db( db, inBuffers[city]["data"], inBuffers[city]["table"], api='psql', epsg=inBuffers[city]["epsg"], geomType='POINT', colGeom=cgeom ) # Send Buffers data to PostgreSQL inBuffers[city]["pg_buffer"] = shp_to_psql( db, multiBuffer, pgTable='buffers_{}'.format(city), api="shp2pgsql", srsEpsgCode=inBuffers[city]["epsg"] ) inBuffers[city]["filter_table"] = q_to_ntbl( db, "filter_{}".format(inBuffers[city]["table"]), ( "SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer" ).format( cols = ", ".join(dataColumns), pgtable = inBuffers[city]["table"], bftable = inBuffers[city]["pg_buffer"] ), api='psql' ) inBuffers[city]["outside_table"] = q_to_ntbl( db, "outside_{}".format(inBuffers[city]["table"]), ( "SELECT * FROM (" "SELECT srcdata.*, " "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) " "AS not_intersect_buffer FROM (" "SELECT {cols}, keyword, geom, " "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer " "FROM {pgtable} " "GROUP BY {cols}, keyword, geom" ") AS srcdata, (" "SELECT cardeal, geom AS bfg FROM {bftable}" ") AS buffersg " "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE " "GROUP BY {cols}, keyword, geom, extracted_buffer" ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9" ).format( cols = ", ".join(dataColumns), pgtable = inBuffers[city]["table"], bftable = inBuffers[city]["pg_buffer"] ), api='psql' ) # Union these two tables inBuffers[city]["table"] = q_to_ntbl(db, "data_{}".format(city), ( "SELECT * FROM {intbl} UNION ALL " "SELECT {cols}, keyword, geom, extracted_buffer, " "CASE WHEN array_length(not_intersect_buffer, 1) = 9 " "THEN '{array_symbol}' ELSE not_intersect_buffer END AS " "intersect_buffer FROM {outbl}" ).format( intbl = inBuffers[city]["filter_table"], outbl = inBuffers[city]["outside_table"], cols = ", ".join(dataColumns), array_symbol = '{' + '}' ), api='psql') """ Get Buffers table with info related: -> pnt_obtidos = nr pontos obtidos usando esse buffer -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas obtidos com ele -> pnt_intersect = nt pontos que se intersectam com o buffer -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao foram obtidos como buffer """ inBuffers[city]["pg_buffer"] = q_to_ntbl( db, "dt_{}".format(inBuffers[city]["pg_buffer"]), ( "SELECT main.*, get_obtidos.pnt_obtidos, " "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, " "int_not_obtained.pnt_intersect_non_obtain " "FROM {bf_table} AS main " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY gid, cardeal" ") AS get_obtidos ON main.gid = get_obtidos.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.extracted_buffer) " "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE " "GROUP BY gid, cardeal" ") AS obtidos_fora ON main.gid = obtidos_fora.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "GROUP BY gid, cardeal" ") AS intersecting ON main.gid = intersecting.gid " "LEFT JOIN (" "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain " "FROM {bf_table} AS bf " "INNER JOIN {dt_table} AS dt " "ON bf.cardeal = ANY(dt.intersect_buffer) " "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) " "GROUP BY gid, cardeal" ") AS int_not_obtained " "ON main.gid = int_not_obtained.gid " "ORDER BY main.gid" ).format( bf_table = inBuffers[city]["pg_buffer"], dt_table = inBuffers[city]["table"] ), api='psql' ) """ Get Points table with info related: -> nobtido = n vezes um ponto foi obtido -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer com o qual se intersecta -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer com o qual nao se intersecta -> nintersect = n vezes que um ponto se intersecta com um buffer -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar de se intersectar com o buffer """ inBuffers[city]["table"] = q_to_ntbl( db, "info_{}".format(city), ( "SELECT {cols}, dt.keyword, dt.geom, " "CAST(dt.extracted_buffer AS text) AS extracted_buffer, " "CAST(dt.intersect_buffer AS text) AS intersect_buffer, " "array_length(extracted_buffer, 1) AS nobtido, " "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END) AS obtido_e_intersect, " "(array_length(extracted_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, " "array_length(intersect_buffer, 1) AS nintersect, " "(array_length(intersect_buffer, 1) - SUM(" "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE " "THEN 1 ELSE 0 END)) AS intersect_sem_obtido " "FROM {dt_table} AS dt " "INNER JOIN {bf_table} AS bf " "ON bf.cardeal = ANY(dt.extracted_buffer) " "GROUP BY {cols}, dt.keyword, dt.geom, " "dt.extracted_buffer, dt.intersect_buffer" ).format( dt_table = inBuffers[city]["table"], bf_table = inBuffers[city]["pg_buffer"], cols = ", ".join(["dt.{}".format(x) for x in dataColumns]) ), api='psql' ) # Export Results dbtbl_to_shp( db, inBuffers[city]["table"], 'geom', os.path.join(workspace, "{}.shp".format(inBuffers[city]["table"])), api='psql', epsg=inBuffers[city]["epsg"] ) dbtbl_to_shp( db, inBuffers[city]["pg_buffer"], 'geom', os.path.join(workspace, "{}.shp".format(inBuffers[city]["pg_buffer"])), api='psql', epsg=inBuffers[city]["epsg"] ) return inBuffers
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE): """ Search for data in DSN and other platforms by cell """ import time; from gasp.gt.fmshp import shp_to_obj from gasp.sql.db import create_db from gasp.sde.dsn.fb.places import places_by_query from gasp.g.prj import df_prj from gasp.pyt.df.to import merge_df from gasp.gt.toshp.db import dbtbl_to_shp from gasp.sql.to import q_to_ntbl from gasp.sql.to import df_to_db # Open GRID SHP GRID_DF = shp_to_obj(GRID_PNT) GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF GRID_DF["lng"] = GRID_DF.geometry.x.astype(float) GRID_DF["lat"] = GRID_DF.geometry.y.astype(float) GRID_DF["grid_id"] = GRID_DF.index # GET DATA RESULTS = [] def get_data(row, datasrc): if datasrc == 'facebook': d = places_by_query( {'x' : row.lng, 'y' : row.lat, 'r' : RADIUS}, 4326, keyword=None, epsgOut=EPSG, _limit='100', onlySearchAreaContained=None ) else: raise ValueError('{} as datasource is not a valid value'.format(datasrc)) if type(d) == int: return d['grid_id'] = row.grid_id RESULTS.append(d) time.sleep(5) GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1) RT = merge_df(RESULTS) # Create DB create_db(db, overwrite=True, api='psql') # Send Data to PostgreSQL df_to_db( db, RT, "{}_data".format(DATA_SOURCE), EPSG, "POINT", colGeom='geometry' if 'geometry' in RT.columns.values else 'geom' ) COLS = [ x for x in RT.columns.values if x != "geometry" and \ x != 'geom' and x != "grid_id" ] + ["geom"] GRP_BY_TBL = q_to_ntbl(db, "{}_grpby".format(DATA_SOURCE), ( "SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id " "FROM {dtsrc}_data GROUP BY {cols}" ).format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql') dbtbl_to_shp( db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG ) return OUTPUT_TABLE
def multiCols_FK_to_singleCol(db, tbl_wPk, pkCol, tbl_multiFk, fkCols, newTable, colsSel=None, whrCls=None): """ For two tables as: Main table: PK | col_1 | col_2 | col_n 1 | 0 | 0 | 0 2 | 1 | 1 | 1 3 | 0 | 2 | 2 4 | 1 | 2 | 3 Table with a foreign key with several columns: col_1 | col_2 | col_n 0 | 0 | 0 0 | 0 | 0 0 | 2 | 2 1 | 1 | 1 1 | 2 | 3 1 | 1 | 1 Create a new table with a foreign key in a single column: col_1 | col_2 | col_n | FK 0 | 0 | 0 | 1 0 | 0 | 0 | 1 0 | 2 | 2 | 3 1 | 1 | 1 | 2 1 | 2 | 3 | 4 1 | 1 | 1 | 2 In this example: pk_field = PK cols_foreign = {col_1 : col_1, col_2: col_2, col_n : col_n} (Keys are cols of tbl_wPk and values are cols of the tbl_multiFk """ if type(fkCols) != dict: raise ValueError("fkCols parameter should be a dict") from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl colsSel = obj_to_lst(colsSel) q = ( "SELECT {tpk}.{pk}, {cls} FROM {tfk} " "INNER JOIN {tpk} ON {tblRel}{whr}" ).format( tpk=tbl_wPk, pk=pkCol, tfk=tbl_multiFk, cls="{}.*".format(tbl_multiFk) if not colsSel else \ ", ".join(["{}.{}".format(tbl_wPk, pkCol) for c in colsSel]), tblRel=" AND ".join([ "{}.{} = {}.{}".format( tbl_multiFk, fkCols[k], tbl_wPk, k ) for k in fkCols ]), whr="" if not whrCls else " WHERE {}".format(whrCls) ) outbl = q_to_ntbl(db, newTable, q, api='psql') return outbl
def select_using_excel_refs(db_name, excel_file, sheet_name, pgtable, ref_fields, tableInRef, tableOutRef=None): """ Split PGTABLE using references in excel table Create two tables: * One with similar rows - columns combination are in excel table; * One with rows not in excel table. TODO: Check if it's works. """ from gasp.fm import tbl_to_obj from gasp.sql.i import cols_type from gasp.sql.to import q_to_ntbl def to_and(row, cols, ctype): def get_equal(_type): return '{}=\'{}\'' if _type == str else '{}={}' row['AND_E'] = ' AND '.join( get_equal(ctype[col]).format(col, row[col]) for col in cols) row['AND_E'] = '(' + row['AND_E'] + ')' return row # Get excel data table = tbl_to_obj(excel_file, sheet=sheet_name) # Get reference fields type TYPE_COLS = cols_type(db_name, pgtable) table = table.apply(lambda x: to_and(x, ref_fields, TYPE_COLS)) whr_equal = ' OR '.join(table['AND_E']) q_to_ntbl(db_name, tableInRef, "SELECT * FROM {} WHERE {}".format(pgtable, whr_equal), api='psql') if tableOutRef: COLS_RELATION = " AND ".join([ "{ft}.{f} = {st}.{f}".format(ft=pgtable, f=col, st=tableInRef) for col in TYPE_COLS ]) q_to_ntbl(db_name, tableOutRef, ("SELECT {ft}.* FROM {ft} LEFT JOIN {st} ON " "{rel} WHERE {st}.{c} IS NULL").format( ft=pgtable, st=tableInRef, rel=COLS_RELATION, c=TYPE_COLS.keys()[0]), api='psql')
def del_rows_by_temporal_proximity(db, table, entity_fields, day_field, hour_field, hour_decimal, minute_field, second_field, time_tolerance, outresult, exclusionRows=None): """ Exclude rows from one pgtable within some temporal interval from the previous row. Table structure should be entity | day | hour | minute | seconds | hour_decimal 0 | 2018-01-02 | 5 | X | X | 5,10 0 | 2018-01-03 | 4 | X | X | 4,15 0 | 2018-01-02 | 5 | X | X | 5,12 0 | 2018-01-02 | 5 | X | X | 5,8 1 | 2018-01-02 | 4 | X | X | 4,10 1 | 2018-01-02 | 5 | X | X | 5,12 1 | 2018-01-02 | 4 | X | X | 4,20 1 | 2018-01-02 | 4 | X | X | 4,12 1 | 2018-01-02 | 4 | X | X | 4,6 """ from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl entity_fields = obj_to_lst(entity_fields) if not entity_fields: raise ValueError("entity_fields value is not valid!") if exclusionRows: # Get Rows deleted in table sql = ( "SELECT *, ({hourDec} - previous_hour) AS deltatime FROM (" "SELECT *, {lag_entity}, " "LAG({hourDec}) OVER(PARTITION BY " "{entityCols}, {dayF} ORDER BY " "{entityCols}, {dayF}, {hourF}, {minutesF}, {secondsF}" ") AS previous_hour " "FROM {mtable} ORDER BY {entityCols}, {dayF}, " "{hourF}, {minutesF}, {secondsF}" ") AS w_previous_tbl " "WHERE previous_hour IS NOT NULL AND " "({hourDec} - previous_hour) < {tol} / 60.0" ).format( hourDec=hour_decimal, lag_entity=", ".join([ "LAG({cl}) OVER(PARTITION BY {ent}, {d} ORDER BY {ent}, {d}, {h}, {m}, {s}) AS prev_{cl}" .format(cl=c, ent=", ".join(entity_fields), d=day_field, h=hour_field, m=minute_field, s=second_field) for c in entity_fields ]), entityCols=", ".join(entity_fields), dayF=day_field, hourF=hour_field, minutesF=minute_field, secondsF=second_field, mtable=table, tol=str(time_tolerance)) q_to_ntbl(db, exclusionRows, sql, api='psql') # Get rows outside the given time tolerance sql = ( "SELECT *, ({hourDec} - previous_hour) AS deltatime FROM (" "SELECT *, {lag_entity}, " "LAG({hourDec}) OVER(PARTITION BY {entityCols}, {dayF} ORDER BY " "{entityCols}, {dayF}, {hourF}, {minutesF}, " "{secondsF}) AS previous_hour " "FROM {mtable} ORDER BY {entityCols}, {dayF}, {hourF}, " "{minutesF}, {secondsF}" ") AS w_previous_tbl " "WHERE ({hourDec} - previous_hour) IS NULL OR " "({hourDec} - previous_hour) > {tol} / 60.0" ).format( hourDec=hour_decimal, lag_entity=", ".join([ "LAG({cl}) OVER(PARTITION BY {ent}, {d} ORDER BY {ent}, {d}, {h}, {m}, {s}) AS prev_{cl}" .format(cl=c, ent=", ".join(entity_fields), d=day_field, h=hour_field, m=minute_field, s=second_field) for c in entity_fields ]), entityCols=", ".join(entity_fields), dayF=day_field, hourF=hour_field, minutesF=minute_field, secondsF=second_field, mtable=table, tol=str(time_tolerance)) q_to_ntbl(db, outresult, sql, api='psql') return outresult
def osm_to_relationaldb(osmData, inSchema, osmGeoTbl, osmCatTbl, osmRelTbl, outSQL=None, db_name=None): """ PostgreSQL - OSM Data to Relational Model TODO: Just work for one geom table at once E.g. osmData = '/home/jasp/flainar/osm_centro.xml' inSchema = { "TBL" : ['points', 'lines', 'multipolygons'], 'FID' : 'CAST(osm_id AS bigint)', "COLS" : [ 'name', "ST_X(wkb_geometry) AS longitude", "ST_Y(wkb_geometry) AS latitude", "wkb_geometry AS geom", "NULL AS featurecategoryid", "NULL AS flainarcategoryid", "NULL AS createdby", "NOW() AS createdon", "NULL AS updatedon", "NULL AS deletedon" ], "NOT_KEYS" : [ 'ogc_fid', 'osm_id', 'name', "wkb_geometry", 'healthcare2', 'other_tags' ] } osmGeoTbl = {"TBL" : 'position', "FID" : 'positionid'} osmCatTbl = { "TBL" : 'osmcategory', "FID" : "osmcategoryid", "KEY_COL" : "keycategory", "VAL_COL" : "value", "COLS" : [ "NULL AS createdby", "NOW() AS createdon", "NULL AS updatedon", "NULL AS deletedon" ] } osmRelTbl = { "TBL" : "position_osmcat", "FID" : 'pososmcatid' } """ from gasp.pyt import obj_to_lst from gasp.pyt.oss import fprop from gasp.sql.i import cols_name from gasp.sql.to import q_to_ntbl from gasp.sql.db import create_db inSchema["TBL"] = obj_to_lst(inSchema["TBL"]) # Create DB db = create_db(fprop(osmData, 'fn') if not db_name else db_name, api='psql') # Send OSM data to Database osm_to_psql(osmData, db) # Get KEYS COLUMNS transcols = {} for tbl in inSchema["TBL"]: transcols[tbl] = [ c for c in cols_name(db, tbl, sanitizeSpecialWords=None) if c not in inSchema["NOT_KEYS"] ] # Create osmGeoTbl osmgeotbl = [ q_to_ntbl(db, osmGeoTbl[tbl]['TBL'], ("SELECT {} AS {}, {} FROM {}").format( inSchema["FID"], osmGeoTbl[tbl]["FID"], ", ".join(inSchema["COLS"]), tbl), api='psql') for tbl in inSchema["TBL"] ] # Create OSM categories table qs = [] for tbl in inSchema["TBL"]: qs.extend([ ("SELECT '{keyV}' AS {keyC}, CAST({t}.{keyV} AS text) AS {valC} " "FROM {t} WHERE {t}.{keyV} IS NOT NULL " "GROUP BY {t}.{keyV}").format(keyV=c, t=tbl, keyC=osmCatTbl["KEY_COL"], valC=osmCatTbl["VAL_COL"]) for c in transcols[tbl] ]) osmcatbl = q_to_ntbl( db, osmCatTbl["TBL"], ("SELECT row_number() OVER(ORDER BY {keyC}) " "AS {osmcatid}, {keyC}, {valC}{ocols} " "FROM ({q}) AS foo").format( q="SELECT {k}, {v} FROM ({t}) AS kvtbl GROUP BY {k}, {v}".format( k=osmCatTbl["KEY_COL"], v=osmCatTbl["VAL_COL"], t=" UNION ALL ".join(qs), ) if len(inSchema["TBL"]) > 1 else " UNION ALL ".join(qs), keyC=osmCatTbl["KEY_COL"], osmcatid=osmCatTbl["FID"], valC=osmCatTbl["VAL_COL"], ocols="" if "COLS" not in osmCatTbl else ", {}".format(", ".join( osmCatTbl["COLS"]))), api='psql') # Create relation table osmreltbl = [] for tbl in inSchema["TBL"]: qs = [( "SELECT {fid}, '{keyV}' AS key, CAST({t}.{keyV} AS text) AS osmval " "FROM {t} WHERE {t}.{keyV} IS NOT NULL").format( fid=inSchema["FID"], keyV=c, t=tbl) for c in transcols[tbl]] osmreltbl.append( q_to_ntbl( db, osmRelTbl[tbl]["TBL"], ("SELECT foo.{fid} AS {nfid}, catbl.{osmcatfid} " "FROM ({mtbl}) AS foo INNER JOIN {catTbl} AS catbl " "ON foo.key = catbl.{catkey} AND foo.osmval = catbl.{catval}" ).format(mtbl=" UNION ALL ".join(qs), fid=inSchema["FID"], nfid=osmRelTbl[tbl]["FID"], catTbl=osmCatTbl["TBL"], osmcatfid=osmCatTbl["FID"], catkey=osmCatTbl["KEY_COL"], catval=osmCatTbl["VAL_COL"]), api='psql')) if not outSQL: return osmgeotbl, osmcatbl, osmreltbl else: from gasp.sql.fm import dump_tbls return dump_tbls(db, osmgeotbl + [osmcatbl] + osmreltbl, outSQL)
def st_near(db, inTbl, inGeom, nearTbl, nearGeom, output, near_col='near', api='psql', whrNear=None, outIsFile=None, until_dist=None, cols_in_tbl=None, intbl_pk=None, cols_near_tbl=None): """ Near tool for PostGIS and Spatialite api options: * psql * splite or spatialite """ if api == 'psql' and not intbl_pk: from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl _out = q_to_ntbl( db, output, ("SELECT m.*, ST_Distance(m.{ingeom}, j.geom) AS {distCol} " "FROM {t} AS m, (" "SELECT ST_UnaryUnion(ST_Collect({neargeom})) AS geom " "FROM {tblNear}{nearwhr}" ") AS j").format(ingeom=inGeom, distCol=near_col, t=inTbl, neargeom=nearGeom, tblNear=nearTbl), api='psql') return output elif api == 'psql' and intbl_pk: from gasp.pyt import obj_to_lst from gasp.sql.to import q_to_ntbl _out = q_to_ntbl( db, output, ("SELECT DISTINCT ON (s.{col_pk}) " "{inTblCols}, {nearTblCols}" "ST_Distance(" "s.{ingeomCol}, h.{negeomCol}" ") AS {nearCol} FROM {in_tbl} AS s " "LEFT JOIN {near_tbl} AS h " "ON ST_DWithin(s.{ingeomCol}, h.{negeomCol}, {dist_v}) " "ORDER BY s.{col_pk}, ST_Distance(s.{ingeomCol}, h.{negeomCol})" ).format( col_pk=intbl_pk, inTblCols="s.*" if not cols_in_tbl else ", ".join( ["s.{}".format(x) for x in obj_to_lst(cols_in_tbl)]), nearTblCols="" if not cols_near_tbl else ", ".join( ["h.{}".format(x) for x in obj_to_lst(cols_near_tbl)]) + ", ", ingeomCol=inGeom, negeomCol=nearGeom, nearCol=near_col, in_tbl=inTbl, near_tbl=nearTbl, dist_v="100000" if not until_dist else until_dist), api='psql') return output elif api == 'splite' or api == 'spatialite': Q = ("SELECT m.*, ST_Distance(m.{ingeom}, j.geom) AS {distCol} " "FROM {t} AS m, (" "SELECT ST_UnaryUnion(ST_Collect({neargeom})) AS geom " "FROM {tblNear}{nearwhr}" ") AS j").format( ingeom=inGeom, distCol=near_col, t=inTbl, neargeom=nearGeom, tblNear=nearTbl, nearwhr="" if not whrNear else " WHERE {}".format(whrNear)) if outIsFile: from gasp.gt.attr import sel_by_attr sel_by_attr(db, Q, output, api_gis='ogr') else: from gasp.sql.to import q_to_ntbl q_to_ntbl(db, output, Q, api='ogr2ogr') return output else: raise ValueError("api {} does not exist!".format(api))