def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'): """ Send PGSQL Tables from one database to other """ from glass.pys import obj_to_lst from glass.g.wt.sql import df_to_db api = 'pandas' if api != 'pandas' and api != 'psql' else api tables = obj_to_lst(tables) if api == 'pandas': from glass.ng.sql.q import q_to_obj for table in tables: if not qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: if table not in qForTbl: tblDf = q_to_obj(from_db, "SELECT * FROM {}".format( table), db_api='psql') else: tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql') df_to_db(to_db, tblDf, table, api='psql') else: import os from glass.pys.oss import mkdir, del_folder from glass.ng.sql.bkup import dump_tbls from glass.ng.sql.db import restore_tbls tmpFolder = mkdir( os.path.dirname(os.path.abspath(__file__)), randName=True ) # Dump sqlScript = dump_tbls(from_db, tables, os.path.join( tmpFolder, "tables_data.sql" )) # Restore restore_tbls(to_db, sqlScript, tables) del_folder(tmpFolder)
def get_osm_feat_by_rule(nomenclature): """ Return OSM Features By rule """ from glass.ng.sql.q import q_to_obj Q = ("SELECT jtbl.{rellulccls} AS {rellulccls}, " "{osmfeat}.{key} AS {key}, {osmfeat}.{val} AS {val}, " "jtbl.{ruleName} AS {ruleName}, jtbl.{bufferCol} " "AS {bufferCol}, jtbl.{areaCol} AS {areaCol} " "FROM {osmfeat} INNER JOIN (" "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, " "{rules}.{ruleID}, {rules}.{ruleName}, " "{osmrel}.{bufferCol}, {osmrel}.{areaCol} " "FROM {osmrel} " "INNER JOIN {rules} ON {osmrel}.{_ruleID} = {rules}.{ruleID} " ") AS jtbl ON {osmfeat}.{osmid} = jtbl.{relosmid}").format( osmfeat=DB_SCHEMA["OSM_FEATURES"]["NAME"], osmid=DB_SCHEMA["OSM_FEATURES"]["OSM_ID"], key=DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"], val=DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"], osmrel=DB_SCHEMA[nomenclature]["OSM_RELATION"], relosmid=DB_SCHEMA[nomenclature]["OSM_FK"], rellulccls=DB_SCHEMA[nomenclature]["CLS_FK"], _ruleID=DB_SCHEMA[nomenclature]["RULE_FK"], rules=DB_SCHEMA["RULES"]["NAME"], ruleID=DB_SCHEMA["RULES"]["RULE_ID"], ruleName=DB_SCHEMA["RULES"]["RULE_NAME"], bufferCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"], areaCol=DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]) return q_to_obj(PROCEDURE_DB, Q, db_api='sqlite')
def bar_chart_by_query(db, query, xaxis_col, chart_data_fld_name, series, outxls): """ Execute some query for each serie in series. Produce a bar chart with the data returned. Each serie will be new bar in the chart. So, the query statment must have a white space to put each serie in series... E.g.: "SELECT fld_name FROM t WHERE series_fld={}".format(serie in series) """ from glass.ng.sql.q import q_to_obj from glass.ng.pd.joins import combine_dfs from glass.ng.xls.charts import barchart_in_xls_from_df dataDf = [] for serie in series: data = q_to_obj(db, query.format(serie), db_api='psql') data.rename(columns={chart_data_fld_name : serie}, inplace=True) dataDf.append(data) main_df = dataDf[0] main_df = combine_dfs(main_df, dataDf[1:], xaxis_col) outxls = barchart_in_xls_from_df(main_df, outxls, series, xaxis_col) return outxls
def search_words(group=None): """ Get Search Words """ from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj db = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'ecgiwords.db' ) group = obj_to_lst(group) Q = "SELECT words.fid, words.word FROM words ORDER BY words.fid" if not group else ( "SELECT words.fid, words.word FROM words " "INNER JOIN groups ON words.grpid = groups.fid " "WHERE {} " "ORDER BY words.fid" ).format(" OR ".join(["groups.design = '{}'".format(x) for x in group])) words = q_to_obj(db, Q, db_api='sqlite') return words
def split_table_by_col_distinct(db, tbl, col): """ Create a new table for each value in one column """ from glass.ng.sql.q import q_to_obj from glass.ng.prop.sql import cols_type from glass.ng.sql.q import q_to_ntbl fields_types = cols_type(db, tbl) # Get unique values VALUES = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col}".format(col=col, t=tbl), db_api='psql')[col].tolist() whr = '{}=\'{}\'' if fields_types[col] == str else '{}={}' for row in VALUES: q_to_ntbl(db, '{}_{}'.format(tbl, str(row[0])), "SELECT * FROM {} WHERE {}".format( tbl, whr.format(col, str(row[0]))), api='psql')
def db_to_db(db_a, db_b, typeDBA, typeDBB): """ All tables in one Database to other database Useful when we want to migrate a SQLITE DB to a PostgreSQL DB typesDB options: * sqlite * psql """ from glass.g.wt.sql import df_to_db from glass.ng.sql.q import q_to_obj from glass.ng.prop.sql import lst_tbl from glass.ng.sql.db import create_db # List Tables in DB A tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA) # Create database B db_b = create_db(db_b, overwrite=False, api=typeDBB) # Table to Database B for tbl in tbls: df = q_to_obj( db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA ) df_to_db(db_b, df, tbl, append=None, api=typeDBB)
def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = q_to_obj(db, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr)), db_api='psql')[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i + 1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row
def rst_area(db, polygonTable, UPPER=True, api='SQLITE'): """ Select features with area upper than. A field with threshold is needed in the database. """ import datetime as dt from glass.ng.sql.q import q_to_obj from glass.g.it.shp import dbtbl_to_shp as db_to_grs from glass.g.dp.torst import grsshp_to_grsrst as shp_to_rst from glass.ete.osm2lulc import GEOM_AREA RULE_COL = 'area_upper' if UPPER else 'area_lower' OPERATOR = " > " if UPPER else " < " WHR = "{ga} {op} t_{r} AND {r}={cls_}" # Get Classes time_a = dt.datetime.now().replace(microsecond=0) lulcCls = q_to_obj( db, ("SELECT {r} FROM {tbl} WHERE {ga} {op} t_{r} GROUP BY {r}").format( r=RULE_COL, tbl=polygonTable, ga=GEOM_AREA, op=OPERATOR), db_api='psql' if api == 'POSTGIS' else 'sqlite')[RULE_COL].tolist() time_b = dt.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} # Import data into GRASS and convert it to raster clsRst = {} tk = 1 for cls in lulcCls: time_x = dt.datetime.now().replace(microsecond=0) grsVect = db_to_grs(db, polygonTable, "geometry", "{}_{}".format(RULE_COL, cls), inDB="psql" if api == 'POSTGIS' else 'sqlite', where=WHR.format(op=OPERATOR, r=RULE_COL, ga=GEOM_AREA, cls_=cls), notTable=True, filterByReg=True, outShpIsGRASS=True) time_y = dt.datetime.now().replace(microsecond=0) timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x) grsRst = shp_to_rst(grsVect, int(cls), f"rst_{RULE_COL}", cmd=True) time_z = dt.datetime.now().replace(microsecond=0) timeGasto[tk + 1] = ('torst_{}'.format(cls), time_z - time_y) clsRst[int(cls)] = grsRst tk += 2 return clsRst, timeGasto
def split_table_entity_number(db, table, entity_field, entity_number): """ Split tables in several using as reference a number of entities per table If a table has 1 000 000 entities and the entity_number is 250 000, this method will create four tables, each one with 250 000 entities. 250 000 entities, not rows. Don't forget that the main table may have more than one reference to the same entity. """ from glass.ng.sql.q import q_to_obj from glass.ng.prop.sql import cols_type from glass.ng.sql.q import q_to_ntbl # Select entities in table entities = q_to_obj(db, "SELECT {c} FROM {t} GROUP BY {c}".format( c=entity_field, t=table), db_api='psql') # Split entities into groups acoording entity_number entityGroup = [] lower = 0 high = entity_number while lower <= len(entities.index): if high > len(entities.index): high = len(entities.index) entityGroup.append(entities.iloc[lower:high]) lower += entity_number high += entity_number # For each dataframe, create a new table COLS_TYPE = cols_type(db, table) c = 0 for df in entityGroup: if COLS_TYPE[entity_field] != str: df[entity_field] = '{}='.format( entity_field) + df[entity_field].astype(str) else: df[entity_field] = '{}=\''.format( entity_field) + df[entity_field].astype(str) + '\'' whr = ' OR '.join(df[entity_field]) q_to_ntbl(db, '{}_{}'.format(table, str(c)), ("SELECT * FROM {} WHERE {}").format(table, whr), api='psql') c += 1
def grs_rst(db, polyTbl, api='SQLITE'): """ Simple selection, convert result to Raster """ import datetime as dt from glass.ng.sql.q import q_to_obj from glass.g.it.shp import dbtbl_to_shp as db_to_grs from glass.g.dp.torst import grsshp_to_grsrst as shp_to_rst # Get Classes time_a = dt.datetime.now().replace(microsecond=0) lulcCls = q_to_obj( db, ("SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection").format(polyTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = dt.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} # Import data into GRASS and convert it to raster clsRst = {} tk = 1 for cls in lulcCls: time_x = dt.datetime.now().replace(microsecond=0) grsVect = db_to_grs(db, polyTbl, "geometry", "rule1_{}".format(str(cls)), inDB='psql' if api == 'POSTGIS' else 'sqlite', where="selection = {}".format(str(cls)), notTable=True, filterByReg=True, outShpIsGRASS=True) time_y = dt.datetime.now().replace(microsecond=0) grsRst = shp_to_rst(grsVect, int(cls), f"rst_rule1_{str(cls)}", cmd=True) time_z = dt.datetime.now().replace(microsecond=0) clsRst[int(cls)] = grsRst timeGasto[tk] = ('import_{}'.format(cls), time_y - time_x) timeGasto[tk + 1] = ('torst_{}'.format(cls), time_z - time_y) tk += 2 return clsRst, timeGasto
def pgtbl_to_mdl(djg_proj, app, model, datadb, datatbl, geom=None, epsg=None): """ Import data from one PostgreSQL Table into Django Model """ from glass.ng.sql.q import q_to_obj from glass.pys import __import from django.contrib.gis.geos import GEOSGeometry from django.contrib.gis.db import models from glass.webg.djg import get_djgprj # Get data data = q_to_obj(datadb, "SELECT * FROM {}".format(datatbl), geomCol=geom, epsg=epsg) cols = data.columns.values # Get Django Application application = get_djgprj(djg_proj) # Get Model mdl_cls = __import('{}.models.{}'.format(app, model)) mdl_obj = mdl_cls() def upmdl(row): for col in cols: if geom and col == geom: # Add geometry setattr(mdl_obj, col, GEOSGeometry(row[col].wkt, srid=epsg)) else: # Check if field is foreign key field_obj = mdl_cls._meta.get_field(col) if not isinstance(field_obj, models.ForeignKey): setattr(mdl_obj, col, row[col]) else: related_name = field_obj.related_model.__name__ related_model = __import('{}.models.{}'.format( app, related_name)) related_obj = related_model.objects.get(pk=int(row[col])) setattr(mdl_obj, col, related_obj) mdl_obj.save() data.apply(lambda x: upmdl(x), axis=1)
def tbl_geomtype(db, table, geomCol='geom'): """ Return the number of geometry types in table """ from glass.ng.sql.q import q_to_obj return int( q_to_obj(db, ("SELECT COUNT(*) AS row_count FROM (" "SELECT ST_GeometryType((ST_Dump({})).geom) AS cnt_geom " "FROM {} GROUP BY ST_GeometryType((ST_Dump({})).geom)" ") AS foo").format(geomCol, table, geomCol), db_api='psql').iloc[0].row_count)
def distinct_val(db, pgtable, column): """ Get distinct values in one column of one pgtable """ from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj data = q_to_obj(db, "SELECT {col} FROM {t} GROUP BY {col};".format( col=", ".join(obj_to_lst(column)), t=pgtable), db_api='psql').to_dict(orient="records") return data
def count_entity_periods_with_certain_duration(db, PERIOD_INTERVAL, PGTABLE, TIME_FIELD, ENTITY_FIELD, OUT_TABLE, filterWhere=None): """ Count rows in a pgtable for a given period of X minutes for each interest entity PERIOD_INTERVAL = "01:00:00" """ from glass.pys.tm import day_to_intervals2 from glass.ng.pd.joins import combine_dfs from glass.ng.wt import obj_to_tbl # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows by entity counting = [] for _int in INTERVALS: Q = ("SELECT {entityCol}, COUNT({entityCol}) AS {cntCol} " "FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr} " "GROUP BY {entityCol}").format(cntCol="s{}_e{}".format( _int[0][:5], _int[1][:5]).replace(":", "_"), table=PGTABLE, timeCol=TIME_FIELD, entityCol=ENTITY_FIELD, minLower=_int[0], minUpper=_int[1], whr="" if not filterWhere else " AND ({}) ".format(filterWhere)) count = q_to_obj(db, Q, db_api='psql') counting.append(count) mainDf = combine_dfs(counting[0], counting[1:], ENTITY_FIELD) obj_to_tbl(mainDf, OUT_TABLE) return OUT_TABLE
def show_duplicates_in_xls(db_name, table, pkCols, outFile, tableIsQuery=None): """ Find duplicates and write these objects in a table """ from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj from glass.ng.wt import obj_to_tbl pkCols = obj_to_lst(pkCols) if not pkCols: raise ValueError("pkCols value is not valid") if not tableIsQuery: q = ( "SELECT {t}.* FROM {t} INNER JOIN (" "SELECT {cls}, COUNT({cnt}) AS conta FROM {t} " "GROUP BY {cls}" ") AS foo ON {rel} " "WHERE conta > 1" ).format( t=table, cls=", ".join(pkCols), cnt=pkCols[0], rel=" AND ".join([ "{t}.{c} = foo.{c}".format(t=table, c=col) for col in pkCols ]) ) else: q = ( "SELECT foo.* FROM ({q_}) AS foo INNER JOIN (" "SELECT {cls}, COUNT({cnt}) AS conta " "FROM ({q_}) AS foo2 GROUP BY {cls}" ") AS jt ON {rel} " "WHERE conta > 1" ).format( q_=table, cls=", ".join(pkCols), cnt=pkCols[0], rel=" AND ".join([ "foo.{c} = jt.{c}".format(c=x) for x in pkCols ]) ) data = q_to_obj(db_name, q, db_api='psql') obj_to_tbl(data, outFile) return outFile
def check_last_id(db, pk, table): """ Check last ID of a given table return 0 if there is no data """ from glass.ng.sql.q import q_to_obj q = "SELECT MAX({}) AS fid FROM {}".format(pk, table) d = q_to_obj(db, q, db_api='psql').fid.tolist() if not d[0]: return 0 else: return d[0]
def count_by_periods_with_certain_duration(db, PERIOD_INTERVAL, pgtable, TIME_FIELD, outTable, filterWhere=None): """ Count rows in a pgtable by periods of X minutes PERIOD_INTERVAL = "01:00:00" """ import pandas from glass.pys.tm import day_to_intervals2 # Get Intervals INTERVALS = day_to_intervals2(PERIOD_INTERVAL) # For each interval/period, count the number of rows counting = None for _int_ in INTERVALS: QUERY = ("SELECT COUNT(*) AS count FROM {table} WHERE " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= " "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND " "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < " "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr}").format( table=pgtable, timeCol=TIME_FIELD, minLower=_int_[0], minUpper=_int_[1], whr="" if not filterWhere else " AND ({})".format(filterWhere)) count = q_to_obj(db, QUERY, db_api='psql') count.rename(index={0: "{}-{}".format(_int_[0][:5], _int_[1][:5])}, inplace=True) if type(counting) != pandas.DataFrame: counting = count.copy() else: counting = counting.append(count, ignore_index=False) obj_to_tbl(counting, outTable) return outTable
def tbl_ext(db, table, geomCol): """ Return extent of the geometries in one pgtable """ from glass.ng.sql.q import q_to_obj q = ("SELECT MIN(ST_X(pnt_geom)) AS eleft, MAX(ST_X(pnt_geom)) AS eright, " "MIN(ST_Y(pnt_geom)) AS bottom, MAX(ST_Y(pnt_geom)) AS top " "FROM (" "SELECT (ST_DumpPoints({geomcol})).geom AS pnt_geom " "FROM {tbl}" ") AS foo").format(tbl=table, geomcol=geomCol) ext = q_to_obj(db, q, db_api='psql').to_dict(orient='index')[0] return [ext['eleft'], ext['bottom'], ext['eright'], ext['top']]
def cols_name(dbname, table, sanitizeSpecialWords=True, api='psql', dbset='default'): """ Return the columns names of a table in one Database """ if api == 'psql': c = sqlcon(dbname, sqlAPI='psql', dbset=dbset) cursor = c.cursor() cursor.execute("SELECT * FROM {} LIMIT 1;".format(table)) colnames = [desc[0] for desc in cursor.description] if sanitizeSpecialWords: from glass.cons.psql import PG_SPECIAL_WORDS for i in range(len(colnames)): if colnames[i] in PG_SPECIAL_WORDS: colnames[i] = '"{}"'.format(colnames[i]) elif api == 'sqlite': import sqlite3 con = sqlite3.connect(dbname) cursor = con.execute("SELECT * FROM {} LIMIT 1".format(table)) colnames = list(map(lambda x: x[0], cursor.description)) elif api == 'mysql': from glass.ng.sql.q import q_to_obj data = q_to_obj(dbname, "SELECT * FROM {} LIMIT 1".format(table), db_api='mysql') colnames = data.columns.values else: raise ValueError('API {} is not available'.format(api)) return colnames
def get_stops_by_route(isCIRC): stp_by_route = q_to_obj( db, ("SELECT {t}.{routeF} AS fid_route, {t}.{stopF} AS stop_a, " "{t}.{orderF} AS order_a, " "aux.{stopF} AS stop_b, aux.{orderF} AS order_b " "FROM {t} INNER JOIN {t} AS aux " "ON {t}.{routeF} = aux.{routeF} " "WHERE {t}.{circfield}{operator}{circvalue} " "ORDER BY {t}.{routeF}, {t}.{orderF}, aux.{orderF}").format( routeF=STOPS_BY_ROUTE_TABLE["ID"], stopF=STOPS_BY_ROUTE_TABLE["STOP"], orderF=STOPS_BY_ROUTE_TABLE["ORDER"], t=STOPS_BY_ROUTE_TABLE["NAME"], circfield=STOPS_BY_ROUTE_TABLE["CIRC"], circvalue=STOPS_BY_ROUTE_TABLE["ISCIRC"], operator="=" if isCIRC else "<>")) return stp_by_route
def row_num(db, table, where=None, api='psql', dbset='default'): """ Return the number of rows in Query API's Available: * psql; * sqlite; """ from glass.ng.sql.q import q_to_obj if not table.startswith('SELECT '): Q = "SELECT COUNT(*) AS nrows FROM {}{}".format( table, "" if not where else " WHERE {}".format(where)) else: Q = "SELECT COUNT(*) AS nrows FROM ({}) AS foo".format(table) d = q_to_obj(db, Q, db_api=api, dbset=dbset) return int(d.iloc[0].nrows)
def lst_views(db, schema='public', basename=None, dbset='default'): """ List Views in database """ from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj basename = obj_to_lst(basename) basenameStr = "" if not basename else "{}".format(" OR ".join( ["{} LIKE '%%{}%%'".format("table_name", b) for b in basename])) views = q_to_obj( db, ("SELECT table_name FROM information_schema.views " "WHERE table_schema='{}'{}").format( schema, "" if not basename else " AND ({})".format(basenameStr)), db_api='psql', dbset=dbset) return views.table_name.tolist()
def apndtbl_in_otherdb(db_a, db_b, tblA, tblB, mapCols, geomCol=None, srsEpsg=None, con_a=None, con_b=None): """ Append data of one table to another table in other database. """ from glass.ng.sql.q import q_to_obj from glass.g.wt.sql import df_to_db df = q_to_obj(db_a, "SELECT {} FROM {}".format( ", ".join(list(mapCols.keys())), tblA ), db_api='psql', geomCol=geomCol, epsg=srsEpsg, dbset=con_a) # Change Names df.rename(columns=mapCols, inplace=True) if geomCol: for k in mapCols: if geomCol == k: geomCol = mapCols[k] break # Get Geom Type # Send data to other database if geomCol and srsEpsg: from glass.g.prop.feat import get_gtype gType = get_gtype(df, geomCol=geomCol, gisApi='pandas') df_to_db( db_b, df, tblB, append=True, api='psql', epsg=srsEpsg, geomType=gType, colGeom=geomCol, dbset=con_b ) else: df_to_db(db_b, df, tblB, append=True, api='psql', dbset=con_b) return tblB
def ID_rows_with_temporal_proximity_by_entities(db, table, entity_field, day_field, hour_field, hour_decimal_field, time_tolerance, outXlsPath): """ Retrieve rows from one pgtable with some temporal proximity Table structure should be entity | day | hour | hour_decimal 0 | 2018-01-02 | 5 | 5,10 0 | 2018-01-03 | 4 | 4,15 0 | 2018-01-02 | 5 | 5,12 0 | 2018-01-02 | 5 | 5,8 1 | 2018-01-02 | 4 | 4,10 1 | 2018-01-02 | 5 | 5,12 1 | 2018-01-02 | 4 | 4,20 1 | 2018-01-02 | 4 | 4,12 1 | 2018-01-02 | 4 | 4,6 For a time_tolerance of 5 minutes, the output table will have the rows with a temporal difference inside/bellow that time tolerance entity_field could be more than one field This method only identifies if one entity, for one day, has rows very close of each others, in terms of time. Not a good strategy for large tables. For large tables, SQL based methods are needed """ from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj from glass.ng.prop.sql import cols_type from glass.ng.wt import obj_to_tbl entity_field = obj_to_lst(entity_field) COLS = entity_field + [day_field, hour_field] COLS_TYPE = cols_type(db, table) # TIME TOLERANCE IN HOURS TIME_TOLERANCE = time_tolerance / 60.0 def thereIsRowsSameTimeInt(row): whr = [] for c in COLS: if COLS_TYPE[c] == str: whr.append("{}='{}'".format(c, row[c])) else: whr.append("{}={}".format(c, row[c])) hourRows = q_to_obj(db, "SELECT {} FROM {} WHERE {}".format( hour_decimal_field, table, " AND ".join(whr)), db_api='psql')[hour_decimal_field].tolist() for i in range(len(hourRows)): for e in range(i + 1, len(hourRows)): dif = abs(hourRows[i][0] - hourRows[e][0]) if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: break if dif < TIME_TOLERANCE: row['time_difference'] = 1 else: row['time_difference'] = 0 return row # Count entity occourrences for one day and hour countsByEntityTime = q_to_obj( db, ("SELECT {scols}, conta FROM " "(SELECT {scols}, COUNT({ent}) AS conta FROM {tbl} " "GROUP BY {scols}) AS foo WHERE conta > 1").format( scols=', '.join(COLS), ent=entity_field[0], tbl=table), db_api='psql') # For each row in the last count, When count is > 1 # Check time difference between rows for one day and hour countsByEntityTime = countsByEntityTime.apply( lambda x: thereIsRowsSameTimeInt(x), axis=1) obj_to_tbl(countsByEntityTime, outXlsPath) return outXlsPath
def num_selection(osmdb, polyTbl, folder, cellsize, srscode, rstTemplate, api='SQLITE'): """ Select and Convert to Raster """ import datetime import os from threading import Thread if api == 'SQLITE': from glass.g.tbl.filter import sel_by_attr else: from glass.g.it.shp import dbtbl_to_shp as sel_by_attr from glass.ng.sql.q import q_to_obj from glass.g.dp.torst import shp_to_rst # Get classes in data time_a = datetime.datetime.now().replace(microsecond=0) classes = q_to_obj( osmdb, ("SELECT selection FROM {} " "WHERE selection IS NOT NULL " "GROUP BY selection").format(polyTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist() time_b = datetime.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} # Select and Export clsRst = {} SQL_Q = "SELECT {lc} AS cls, geometry FROM {tbl} WHERE selection={lc}" def FilterAndExport(CLS, cnt): time_x = datetime.datetime.now().replace(microsecond=0) if api == 'SQLITE': shp = sel_by_attr(osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl), os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api_gis='ogr') else: shp = sel_by_attr(osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl), "geometry", os.path.join(folder, 'sel_{}.shp'.format(str(CLS))), api='pgsql2shp', tableIsQuery=True) time_y = datetime.datetime.now().replace(microsecond=0) rstCls = shp_to_rst(shp, None, cellsize, 0, os.path.join(folder, 'sel_{}.tif'.format(str(CLS))), epsg=srscode, rst_template=rstTemplate, api='gdal') time_z = datetime.datetime.now().replace(microsecond=0) clsRst[int(CLS)] = rstCls timeGasto[cnt + 1] = ('toshp_{}'.format(str(CLS)), time_y - time_x) timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y) trs = [] for i in range(len(classes)): trs.append( Thread(name="lll{}".format(str(classes[i])), target=FilterAndExport, args=(classes[i], (i + 1) * 10))) for t in trs: t.start() for t in trs: t.join() return clsRst, timeGasto
def list_trips(db, db_schema, gtfsDay, pgDay, lowerTimeInt=None, upperTimeInt=None): """ Return a DataFrame with all Trips in GTFS DB_SCHEMA example: DB_SCHEMA = { "ROUTES" : { "TNAME" : "routes", #TABLE NAME "ID" : "route_id", # FIELD NAME "SHORT" : "route_short_name", # FIELD NAME "LONG" : "route_long_name", # FIELD NAME "AGENCY" : "agency_id" # FIELD NAME }, "STOP_TIMES" : { "TNAME" : "stop_times", "TRIP" : "trip_id", "STOP" : "stop_id", "SEQUENCE" : "stop_sequence", "DEPARTURE" : "departure_time" }, "TRIPS" : { "TNAME" : "trips", "TRIP" : "trip_id", "ROUTE" : "route_id", "SERVICE" : "service_id" }, "AGENCY" : { "TNAME" : "agency", "ID" : "agency_id", "NAME" : "agency_name" }, "CALENDAR" : { "TNAME" : "calendar_dates", "SERVICE" : "service_id", "DATE" : "date" } } dayToUse example: dayToUse = "20180222" # 22 February 2018 Interval example: upperInterval = "10:00:00" lowerInterval = "08:00:00" """ from glass.ng.sql.q import q_to_obj if upperTimeInt and lowerTimeInt: upperTimeInt = "{} {}".format(pgDay, upperTimeInt) lowerTimeInt = "{} {}".format(pgDay, lowerTimeInt) SQL_ROUTES = ( "SELECT MIN(fid) AS fid, MIN(long_name) AS long_name, " "{short_cod}, stops, stops_sequence, {agencyF}, " "COALESCE({short_cod}, '') || '|' || " "COALESCE(row_number() OVER(PARTITION BY {short_cod} " "ORDER BY {short_cod})) AS route_id, " "COUNT(stops) AS ncirc{freq} " "FROM (" "SELECT {stoptripID} AS fid, {short_cod}, {agencyF}, " "MIN({long_cod}) AS long_name, " "array_agg({stopID} ORDER BY {stoptripID}, {stopSq}) AS stops, " "array_agg({stopSq} ORDER BY {stoptripID}, {stopSq}) AS stops_sequence, " "MIN(departure_time) AS departure_time " "FROM (" "SELECT {stopTiTbl}.{stoptripID}, " "TO_TIMESTAMP('{dday}' || ' ' || {stopTiTbl}.{dep_time}, " "'YYYY-MM-DD HH24:MI:SS') AS departure_time, " "{stopTiTbl}.{stopID}, {stopTiTbl}.{stopSq}, " "foo.{short_cod}, foo.{long_cod}, " "foo.{agencyF} FROM {stopTiTbl} INNER JOIN (" "SELECT {tripsTbl}.{tripID}, {routeT}.{short_cod}, " "{routeT}.{agencyF}, {routeT}.{long_cod} FROM {tripsTbl} " "INNER JOIN {routeT} " "ON {tripsTbl}.{tripRoID} = {routeT}.{routeID} " "INNER JOIN {calenTbl} " "ON {tripsTbl}.{tripServ} = {calenTbl}.{calenServ} " "WHERE {calenTbl}.{calenDate} = {day}" ") AS foo ON {stopTiTbl}.{stoptripID} = foo.{tripID}" ") AS stops " "GROUP BY {stoptripID}, {short_cod}, {agencyF} " "ORDER BY {stoptripID}, {short_cod}" ") AS stops2 {whr}" "GROUP BY {short_cod}, stops, stops_sequence, {agencyF} " "ORDER BY {short_cod}" ).format( short_cod=db_schema["ROUTES"]["SHORT"], agencyF=db_schema["ROUTES"]["AGENCY"], routeT=db_schema["ROUTES"]["TNAME"], routeID=db_schema["ROUTES"]["ID"], long_cod=db_schema["ROUTES"]["LONG"], stoptripID=db_schema["STOP_TIMES"]["TRIP"], stopID=db_schema["STOP_TIMES"]["STOP"], stopSq=db_schema["STOP_TIMES"]["SEQUENCE"], stopTiTbl=db_schema["STOP_TIMES"]["TNAME"], dep_time=db_schema["STOP_TIMES"]["DEPARTURE"], tripsTbl=db_schema["TRIPS"]["TNAME"], tripID=db_schema["TRIPS"]["TRIP"], tripRoID=db_schema["TRIPS"]["ROUTE"], tripServ=db_schema["TRIPS"]["SERVICE"], calenTbl=db_schema["CALENDAR"]["TNAME"], calenServ=db_schema["CALENDAR"]["SERVICE"], calenDate=db_schema["CALENDAR"]["DATE"], day=gtfsDay, dday=pgDay, whr=("WHERE departure_time >= " "TO_TIMESTAMP('{lwInt}', 'YYYY-MM-DD HH24:MI:SS') " "AND departure_time < " "TO_TIMESTAMP('{upInt}', 'YYYY-MM-DD HH24:MI:SS') ").format( upInt=upperTimeInt, lwInt=lowerTimeInt) if upperTimeInt and lowerTimeInt else "", freq= (", (((EXTRACT(hour FROM TO_TIMESTAMP('{upInt}', 'YYYY-MM-DD HH24:MI:SS') - " "TO_TIMESTAMP('{lwInt}', 'YYYY-MM-DD HH24:MI:SS')) * 60) + " "EXTRACT(minute FROM TO_TIMESTAMP('{upInt}', 'YYYY-MM-DD HH24:MI:SS') - " "TO_TIMESTAMP('{lwInt}', 'YYYY-MM-DD HH24:MI:SS')) + " "(EXTRACT(second FROM TO_TIMESTAMP('{upInt}', 'YYYY-MM-DD HH24:MI:SS') - " "TO_TIMESTAMP('{lwInt}', 'YYYY-MM-DD HH24:MI:SS')) / 60)) / COUNT(stops)" ") AS frequencies").format(upInt=upperTimeInt, lwInt=lowerTimeInt) if upperTimeInt and lowerTimeInt else "") trips_df = q_to_obj(db, SQL_ROUTES, db_api='psql') return trips_df
def name_circulations(db, GTFS_SCHEMA, OTHER_SCHEMA, output, other_db=None, serviceSchema=None, routeIdColName=None, tripIdColName=None): """ Get all circulations from GTFS and associate these circulations to other meta columns of other database GTFS_SCHEMA = { "TNAME" : "stop_times", "TRIP" : "trip_id", "STOP" : "stop_id", "SEQUENCE" : "stop_sequence", "DEPARTURE" : "departure_time" } OTHER_SCHEMA = { "TNAME" : "percursos_geom_v2", "ROUTE" : ["carreira", "variante", "sentido"], "SEQUENCE" : "ordem", "STOP" : "paragem" } serviceSchema = { "TRIPS" : { "TNAME" : "trips", "TRIP" : "trip_id", "SERVICE" : "service_id" }, "CALENDAR" : { "TNAME" : "calendar_dates", "SERVICE" : "service_id", "DATE" : "date" }, "FILTER_DAY" : 20180308 } """ import os from glass.pys import obj_to_lst from glass.ng.sql.q import q_to_obj other_db = db if not other_db else other_db # Sanitize Route ID in Other Schema OTHER_SCHEMA_ROUTE = obj_to_lst(OTHER_SCHEMA["ROUTE"]) if len(OTHER_SCHEMA_ROUTE) > 1: from glass.ng.sql.col import txt_cols_to_col ROUTE_COL = routeIdColName if routeIdColName else "fid_route" txt_cols_to_col(other_db, OTHER_SCHEMA["TNAME"], OTHER_SCHEMA_ROUTE, "|", ROUTE_COL) else: ROUTE_COL = routeIdColName if routeIdColName else \ OTHER_SCHEMA_ROUTE[0] """ Get all circulations in GTFS and their start time """ if serviceSchema: serviceSchema["FILTER_DAY"] = obj_to_lst(serviceSchema["FILTER_DAY"]) where = "" if not serviceSchema else (" WHERE {} ").format(" OR ".join([ "{}.{} = {}".format(serviceSchema["CALENDAR"]["TNAME"], serviceSchema["CALENDAR"]["DATE"], d) for d in serviceSchema["FILTER_DAY"] ])) injoinQ = "" if not serviceSchema else ( "INNER JOIN (" "SELECT {tripsTbl}.{tripsTripId} " "FROM {tripsTbl} INNER JOIN {calenTbl} ON " "{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}{whr} " "GROUP BY {tripsTbl}.{tripsTripId}" ") AS trip_service ON {stopTimeTbl}.{stopTimeTrip} " "= trip_service.{tripsTripId} ").format( tripsTbl=serviceSchema["TRIPS"]["TNAME"], tripsTripId=serviceSchema["TRIPS"]["TRIP"], tripsServId=serviceSchema["TRIPS"]["SERVICE"], calenTbl=serviceSchema["CALENDAR"]["TNAME"], calenServId=serviceSchema["CALENDAR"]["SERVICE"], stopTimeTbl=GTFS_SCHEMA["TNAME"], stopTimeTrip=GTFS_SCHEMA["TRIP"], whr=where) newTripCol = tripIdColName if tripIdColName else GTFS_SCHEMA["TRIP"] Q = ( "SELECT {stopTimesT}.{tripId} AS {newTrip}, " "array_agg({stopTimesT}.{stopId} " "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops, " "array_agg({stopTimesT}.{stopSq} " "ORDER BY {stopTimesT}.{tripId}, {stopTimesT}.{stopSq}) AS stops_order, " "MIN({stopTimesT}.{depTime}) AS departure, " "MAX({stopTimesT}.{depTime}) AS depar_last_stop " "FROM {stopTimesT} {injoin}" "GROUP BY {stopTimesT}.{tripId}").format( tripId=GTFS_SCHEMA["TRIP"], stopId=GTFS_SCHEMA["STOP"], stopSq=GTFS_SCHEMA["SEQUENCE"], depTime=GTFS_SCHEMA["DEPARTURE"], stopTimesT=GTFS_SCHEMA["TNAME"], injoin=injoinQ, newTrip=newTripCol) circ = q_to_obj(db, Q) """ Get all routes metadata in the "Other Database/Table" """ Q = ("SELECT {idRoute}, " "array_agg({stopF} ORDER BY {idRoute}, {stopSq}) AS stops, " "array_agg({stopSq} ORDER BY {idRoute}, {stopSq}) AS stops_order " "FROM {t} GROUP BY {idRoute}").format(idRoute=ROUTE_COL, stopF=OTHER_SCHEMA["STOP"], stopSq=OTHER_SCHEMA["SEQUENCE"], t=OTHER_SCHEMA["TNAME"]) routes = q_to_obj(other_db, Q) def sanitizeDf(df, col): df[col] = df[col].astype(str) df[col] = df[col].str.replace('L', '') df[col] = df[col].str.replace(' ', '') df[col] = df[col].str.replace('[', '') df[col] = df[col].str.replace(']', '') return df circ = sanitizeDf(circ, "stops") routes = sanitizeDf(routes, "stops") newDf = circ.merge(routes, how='inner', left_on="stops", right_on="stops") if os.path.dirname(output): # Write XLS from glass.ng.wt import obj_to_tbl obj_to_tbl(newDf, output) else: # Send to pgsql from glass.g.wt.sql import df_to_db df_to_db(db, newDf, output, api='psql') return output
def get_not_used_tags(OSM_FILE, OUT_TBL): """ Use a file OSM to detect tags not considered in the OSM2LULC procedure """ import os from glass.ng.wt import obj_to_tbl from glass.g.tbl.filter import sel_by_attr from glass.ng.sql.q import q_to_obj from glass.ng.pd.split import df_split from glass.pys.oss import fprop from glass.g.it.osm import osm_to_gpkg OSM_TAG_MAP = { "DB" : os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'osmtolulc.sqlite' ), "OSM_FEAT" : "osm_features", "KEY_COL" : "key", "VALUE_COL" : "value", "GEOM_COL" : "geom" } WORKSPACE = os.path.dirname(OUT_TBL) sqdb = osm_to_gpkg(OSM_FILE, os.path.join( WORKSPACE, fprop(OSM_FILE, 'fn') + '.gpkg' )) # Get Features we are considering ourOSMFeatures = q_to_obj(OSM_TAG_MAP["DB"], ( "SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y " "FROM {tbl}" ).format( key=OSM_TAG_MAP["KEY_COL"], value=OSM_TAG_MAP["VALUE_COL"], geom=OSM_TAG_MAP["GEOM_COL"], tbl=OSM_TAG_MAP["OSM_FEAT"] ), db_api='sqlite') # Get Features in File TABLES_TAGS = { 'points' : ['highway', 'man_made', 'building'], 'lines' : ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'], 'multipolygons' : ['aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic', 'land_area', '' 'landuse', 'leisure', 'man_made', 'military', 'natural', 'office', 'place', 'shop', 'sport', 'tourism', 'waterway', 'power', 'railway', 'healthcare', 'highway'] } Qs = [ " UNION ALL ".join([( "SELECT '{keycol}' AS key, {keycol} AS value, " "'{geomtype}' AS geom FROM {tbl} WHERE " "{keycol} IS NOT NULL" ).format( keycol=c, geomtype='Point' if table == 'points' else 'Line' \ if table == 'lines' else 'Polygon', tbl=table ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS ] fileOSMFeatures = q_to_obj(sqdb, ( "SELECT key, value, geom FROM ({}) AS foo " "GROUP BY key, value, geom" ).format(" UNION ALL ".join(Qs)), db_api='sqlite') _fileOSMFeatures = fileOSMFeatures.merge( ourOSMFeatures, how='outer', left_on=["key", "value", "geom"], right_on=["key_y", "value_y", "geom_y"] ) # Select OSM Features of file without correspondence _fileOSMFeatures["isnew"] =_fileOSMFeatures.key_y.fillna(value='nenhum') newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum'] newTags["value"] = newTags.value.str.replace("'", "''") newTags["whr"] = newTags.key + "='" + newTags.value + "'" # Export tags not being used to new shapefile def to_regular_str(row): san_str = row.whr row["whr_san"] = san_str return row for t in TABLES_TAGS: if t == 'points': filterDf = newTags[newTags.geom == 'Point'] elif t == 'lines': filterDf = newTags[newTags.geom == 'Line'] elif t == 'multipolygons': filterDf = newTags[newTags.geom == 'Polygon'] if filterDf.shape[0] > 500: dfs = df_split(filterDf, 500, nrows=True) else: dfs = [filterDf] Q = "SELECT * FROM {} WHERE {}".format( t, filterDf.whr.str.cat(sep=" OR ")) i = 1 for df in dfs: fn = t + '.shp' if len(dfs) == 1 else '{}_{}.shp'.format( t, str(i) ) try: shp = sel_by_attr(sqdb, Q.format( t, df.whr.str.cat(sep=" OR ") ), os.path.join(WORKSPACE, fn), api_gis='ogr') except: __df = df.apply(lambda x: to_regular_str(x), axis=1) shp = sel_by_attr(sqdb, Q.format( t, __df.whr.str.cat(sep=" OR ") ), os.path.join(WORKSPACE, fn)) i += 1 # Export OUT_TBL with tags not being used newTags.drop(['key_y', 'value_y', 'geom_y', 'isnew', 'whr'], axis=1, inplace=True) obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True) return OUT_TBL
from glass.ng.sql.q import q_to_obj from glass.g.wg.gsrv.ws import create_ws from glass.g.wg.gsrv.stores import create_pgstore from glass.g.wg.gsrv.lyrs import pub_pglyr if __name__ == '__main__': # Parameters data_db = 'eow-db-contents' ghost_db = 'flainar-db-geoserver' ws = 'eowdeploy' store = 'ghost' # GeoServer Layers table gsrvlyr = 'geoserverlayers' setupdb = 'flainar' # Get Layers glyr = q_to_obj(data_db, "SELECT name FROM {}".format(gsrvlyr), dbset=setupdb) # Create workspace and store in GeoServer create_ws(ws, overwrite=True) create_pgstore(store, ws, ghost_db, dbset=setupdb + '-gsrv') # Create Layers in geoserver for idx, row in glyr.iterrows(): pub_pglyr(ws, store, row['name'], title='tt_' + row['name'])
def num_selbyarea(db, polyTbl, folder, cellsize, srscode, rstTemplate, UPPER=True, api='SQLITE'): """ Select features with area upper than. A field with threshold is needed in the database. """ import datetime as dt import os from threading import Thread from glass.ng.sql.q import q_to_obj if api == 'SQLITE': from glass.g.tbl.filter import sel_by_attr else: from glass.g.it.shp import dbtbl_to_shp as sel_by_attr from glass.g.dp.torst import shp_to_rst from glass.ete.osm2lulc import GEOM_AREA # Get OSM Features to be selected for this rule RULE_COL = 'area_upper' if UPPER else 'area_lower' OPERATOR = " > " if UPPER else " < " WHR = "{ga} {op} t_{r} AND {r}={cls_}" # Get Classes time_a = dt.datetime.now().replace(microsecond=0) lulcCls = q_to_obj( db, ("SELECT {r} FROM {tbl} WHERE {ga} {op} t_{r} GROUP BY {r}").format( r=RULE_COL, tbl=polyTbl, ga=GEOM_AREA, op=OPERATOR), db_api='psql' if api == 'POSTGIS' else 'sqlite')[RULE_COL].tolist() time_b = dt.datetime.now().replace(microsecond=0) timeGasto = {0: ('check_cls', time_b - time_a)} clsRst = {} SQL_Q = ("SELECT geometry, {c} AS cls FROM {tbl} WHERE {w}") def selAndExport(CLS, cnt): time_x = dt.datetime.now().replace(microsecond=0) if api == "SQLITE": shpCls = sel_by_attr(db, SQL_Q.format(c=str(CLS), tbl=polyTbl, w=WHR.format(op=OPERATOR, r=RULE_COL, ga=GEOM_AREA, cls_=CLS)), os.path.join( folder, "{}_{}.shp".format(RULE_COL, CLS)), api_gis='ogr') else: shpCls = sel_by_attr(db, SQL_Q.format(c=str(CLS), tbl=polyTbl, w=WHR.format(op=OPERATOR, r=RULE_COL, ga=GEOM_AREA, cls_=CLS)), "geometry", os.path.join( folder, "{}_{}.shp".format(RULE_COL, str(CLS))), api='pgsql2shp', tableIsQuery=True) time_y = dt.datetime.now().replace(microsecond=0) rst = shp_to_rst(shpCls, None, cellsize, 0, os.path.join(folder, "{}_{}.tif".format(RULE_COL, CLS)), epsg=srscode, rst_template=rstTemplate, api='gdal') time_z = dt.datetime.now().replace(microsecond=0) clsRst[int(CLS)] = rst timeGasto[cnt + 1] = ('sq_to_shp_{}'.format(str(CLS)), time_y - time_x) timeGasto[cnt + 2] = ('shp_to_rst_{}'.format(str(CLS)), time_z - time_y) thrds = [ Thread(name="area-tk{}".format(lulcCls[i]), target=selAndExport, args=(lulcCls[i], (i + 1) * 10)) for i in range(len(lulcCls)) ] for t in thrds: t.start() for t in thrds: t.join() return clsRst, timeGasto