示例#1
0
文件: to.py 项目: jasp382/gasp
def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'):
    """
    Send PGSQL Tables from one database to other
    """
    
    from gasp.pyt import obj_to_lst
    
    api = 'pandas' if api != 'pandas' and api != 'psql' else api
    
    tables = obj_to_lst(tables)
    
    if api == 'pandas':
        from gasp.sql.fm import q_to_obj
    
        for table in tables:
            if not qForTbl:
                tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                    table), db_api='psql')
        
            else:
                if table not in qForTbl:
                    tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                        table), db_api='psql')
            
                else:
                    tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql')
        
            df_to_db(to_db, tblDf, table, api='psql')
    
    else:
        import os
        from gasp.pyt.oss import mkdir, del_folder
        from gasp.sql.fm  import dump_tbls
        from gasp.sql.to  import restore_tbls
        
        tmpFolder = mkdir(
            os.path.dirname(os.path.abspath(__file__)), randName=True
        )
        
        # Dump 
        sqlScript = dump_tbls(from_db, tables, os.path.join(
            tmpFolder, "tables_data.sql"
        ))
            
        # Restore
        restore_tbls(to_db, sqlScript, tables)
        
        del_folder(tmpFolder)
示例#2
0
def split_table_by_col_distinct(db, tbl, col):
    """
    Create a new table for each value in one column
    """
    
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import cols_type
    from gasp.sql.to import q_to_ntbl
    
    fields_types = cols_type(db, tbl)
    
    # Get unique values
    VALUES = q_to_obj(db,
        "SELECT {col} FROM {t} GROUP BY {col}".format(
            col=col, t=tbl
        ), db_api='psql'
    )[col].tolist()
    
    whr = '{}=\'{}\'' if fields_types[col] == str else '{}={}'
    
    for row in VALUES:
        q_to_ntbl(
            db, '{}_{}'.format(tbl, str(row[0])),
            "SELECT * FROM {} WHERE {}".format(
                tbl, whr.format(col, str(row[0]))
        ), api='psql')
示例#3
0
文件: time.py 项目: jasp382/gasp
    def thereIsRowsSameTimeInt(row):
        whr = []
        for c in COLS:
            if COLS_TYPE[c] == str:
                whr.append("{}='{}'".format(c, row[c]))
            else:
                whr.append("{}={}".format(c, row[c]))

        hourRows = q_to_obj(db,
                            "SELECT {} FROM {} WHERE {}".format(
                                hour_decimal_field, table, " AND ".join(whr)),
                            db_api='psql')[hour_decimal_field].tolist()

        for i in range(len(hourRows)):
            for e in range(i + 1, len(hourRows)):
                dif = abs(hourRows[i][0] - hourRows[e][0])

                if dif < TIME_TOLERANCE:
                    break

            if dif < TIME_TOLERANCE:
                break

        if dif < TIME_TOLERANCE:
            row['time_difference'] = 1
        else:
            row['time_difference'] = 0

        return row
示例#4
0
文件: to.py 项目: jasp382/gasp
def db_to_db(db_a, db_b, typeDBA, typeDBB):
    """
    All tables in one Database to other database
    
    Useful when we want to migrate a SQLITE DB to a PostgreSQL DB
    
    typesDB options:
    * sqlite
    * psql
    """
    
    import os
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import lst_tbl
    from gasp.sql.db import create_db
    
    # List Tables in DB A
    tbls = lst_tbl(db_a, excludeViews=True, api=typeDBA)
    
    # Create database B
    db_b = create_db(db_b, overwrite=False, api=typeDBB)
    
    # Table to Database B
    for tbl in tbls:
        df = q_to_obj(
            db_a, "SELECT * FROM {}".format(tbl), db_api=typeDBA
        )
        
        df_to_db(db_b, df, tbl, append=None, api=typeDBB)
示例#5
0
文件: toxls.py 项目: jasp382/gasp
def bar_chart_by_query(db, query, xaxis_col, chart_data_fld_name, series,
                       outxls):
    """
    Execute some query for each serie in series.
    Produce a bar chart with the data returned.
    Each serie will be new bar in the chart.
    So, the query statment must have a white space
    to put each serie in series... E.g.:
    "SELECT fld_name FROM t WHERE series_fld={}".format(serie in series)
    """

    from gasp.sql.fm import q_to_obj
    from gasp.pyt.df.joins import combine_dfs
    from gasp.pyt.xls.charts import barchart_in_xls_from_df

    dataDf = []
    for serie in series:
        data = q_to_obj(db, query.format(serie), db_api='psql')
        data.rename(columns={chart_data_fld_name: serie}, inplace=True)

        dataDf.append(data)

    main_df = dataDf[0]
    main_df = combine_dfs(main_df, dataDf[1:], xaxis_col)

    outxls = barchart_in_xls_from_df(main_df, outxls, series, xaxis_col)

    return outxls
示例#6
0
文件: to.py 项目: jasp382/gasp
def apndtbl_in_otherdb(db_a, db_b, tblA, tblB, mapCols,
                       geomCol=None, srsEpsg=None):
    """
    Append data of one table to another table in other database.
    """
    
    from gasp.sql.fm import q_to_obj
    
    if geomCol and srsEpsg:
        df = q_to_obj(db_a, "SELECT {} FROM {}".format(
            ", ".join(list(mapCols.keys())), tblA
        ), db_api='psql', geomCol=geomCol, epsg=srsEpsg)
    
    else:
        df = q_to_obj(db_b, "SELECT {} FROM {}".format(
            ", ".join(list(mapCols.keys())), tblA
        ), db_api='psql', geomCol=None, epsg=None)
    
    # Change Names
    df.rename(columns=mapCols, inplace=True)
    
    if geomCol:
        for k in mapCols:
            if geomCol == k:
                geomCol = mapCols[k]
                break
    
    # Get Geom Type
    # Send data to other database
    if geomCol and srsEpsg:
        from gasp.gt.prop.feat import get_gtype
        
        gType = get_gtype(df, geomCol=geomCol, gisApi='pandas')
        
        df_to_db(
            db_b, df, tblB, append=True, api='psql', epsg=srsEpsg,
            geomType=gType, colGeom=geomCol
        )
    
    else:
        df_to_db(db_b, df, tblB, append=True, api='psql')
    
    return tblB
示例#7
0
def tbl_geomtype(db, table, geomCol='geom'):
    """
    Return the number of geometry types in table
    """
    
    from gasp.sql.fm import q_to_obj
    
    return int(q_to_obj(db, (
        "SELECT COUNT(*) AS row_count FROM ("
            "SELECT ST_GeometryType((ST_Dump({})).geom) AS cnt_geom "
            "FROM {} GROUP BY ST_GeometryType((ST_Dump({})).geom)"
        ") AS foo"
    ).format(geomCol, table, geomCol), db_api='psql').iloc[0].row_count)
示例#8
0
def split_table_entity_number(db, table, entity_field, entity_number):
    """
    Split tables in several using as reference a number of entities per table
    
    If a table has 1 000 000 entities and the entity_number is 250 000,
    this method will create four tables, each one with 250 000 entities.
    250 000 entities, not rows. Don't forget that the main table may have
    more than one reference to the same entity.
    """
    
    import pandas
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import cols_type
    from gasp.sql.to import q_to_ntbl
    
    # Select entities in table
    entities = q_to_obj(db, "SELECT {c} FROM {t} GROUP BY {c}".format(
        c=entity_field, t=table
    ), db_api='psql')
    
    # Split entities into groups acoording entity_number
    entityGroup = []
    
    lower = 0
    high = entity_number
    while lower <= len(entities.index):
        if high > len(entities.index):
            high = len(entities.index)
        
        entityGroup.append(entities.iloc[lower : high])
        
        lower += entity_number
        high  += entity_number
    
    # For each dataframe, create a new table
    COLS_TYPE = cols_type(db, table)
    
    c = 0
    for df in entityGroup:
        if COLS_TYPE[entity_field] != str:
            df[entity_field] = '{}='.format(entity_field) + df[entity_field].astype(str)
        else:
            df[entity_field] = '{}=\''.format(entity_field) + df[entity_field].astype(str) + '\''
        
        whr = ' OR '.join(df[entity_field])
        
        q_to_ntbl(db, '{}_{}'.format(table, str(c)), (
            "SELECT * FROM {} WHERE {}"
        ).format(table, whr), api='psql')
        
        c += 1
示例#9
0
    def build12_torst(buildTbl):
        LulcCls = q_to_obj(
            osmdb,
            "SELECT cls FROM {} GROUP BY cls".format(buildTbl),
            db_api='psql' if apidb == 'POSTGIS' else 'sqlite').cls.tolist()

        for lulc_cls in LulcCls:
            time_m = dt.datetime.now().replace(microsecond=0)

            # To SHP
            if apidb == 'SQLITE':
                shpB = sel_by_attr(osmdb,
                                   "SELECT * FROM {} WHERE cls={}".format(
                                       buildTbl, str(lulc_cls)),
                                   os.path.join(
                                       folder,
                                       'nshp_build_{}.shp'.format(lulc_cls)),
                                   api_gis='ogr')

            else:
                shpB = sel_by_attr(osmdb,
                                   "SELECT * FROM {} WHERE cls={}".format(
                                       buildTbl, str(lulc_cls)),
                                   "geometry",
                                   os.path.join(
                                       folder,
                                       'nshp_build_{}.shp'.format(lulc_cls)),
                                   api='pgsql2shp',
                                   tableIsQuery=True)
            time_n = dt.datetime.now().replace(microsecond=0)

            # To RST
            brst = shp_to_rst(shpB,
                              None,
                              cells,
                              0,
                              os.path.join(
                                  folder,
                                  'nrst_build_{}.tif'.format(lulc_cls)),
                              srscode,
                              rstT,
                              api='gdal')
            time_o = dt.datetime.now().replace(microsecond=0)

            resLyr[int(lulc_cls)] = [brst]

            timeGasto[int(lulc_cls)] = ('to_shp_{}'.format(str(lulc_cls)),
                                        time_n - time_m)
            timeGasto[int(lulc_cls) + 1] = ('to_rst_n_{}'.format(
                str(lulc_cls)), time_o - time_n)
示例#10
0
def count_entity_periods_with_certain_duration(db,
                                               PERIOD_INTERVAL,
                                               PGTABLE,
                                               TIME_FIELD,
                                               ENTITY_FIELD,
                                               OUT_TABLE,
                                               filterWhere=None):
    """
    Count rows in a pgtable for a given period of X minutes for each
    interest entity
    
    PERIOD_INTERVAL = "01:00:00"
    """

    import pandas
    from gasp.pyt.tm import day_to_intervals2
    from gasp.pyt.df.joins import combine_dfs

    # Get Intervals
    INTERVALS = day_to_intervals2(PERIOD_INTERVAL)

    # For each interval/period, count the number of rows by entity
    counting = []
    for _int in INTERVALS:
        Q = ("SELECT {entityCol}, COUNT({entityCol}) AS {cntCol} "
             "FROM {table} WHERE "
             "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= "
             "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND "
             "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < "
             "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr} "
             "GROUP BY {entityCol}").format(cntCol="s{}_e{}".format(
                 _int[0][:5], _int[1][:5]).replace(":", "_"),
                                            table=PGTABLE,
                                            timeCol=TIME_FIELD,
                                            entityCol=ENTITY_FIELD,
                                            minLower=_int[0],
                                            minUpper=_int[1],
                                            whr="" if not filterWhere else
                                            " AND ({}) ".format(filterWhere))

        count = q_to_obj(db, Q, db_api='psql')

        counting.append(count)

    mainDf = combine_dfs(counting[0], counting[1:], ENTITY_FIELD)

    obj_to_tbl(mainDf, OUT_TABLE)

    return OUT_TABLE
示例#11
0
def distinct_val(db, pgtable, column):
    """
    Get distinct values in one column of one pgtable
    """
    
    from gasp.pyt    import obj_to_lst
    from gasp.sql.fm import q_to_obj
    
    data = q_to_obj(db,
        "SELECT {col} FROM {t} GROUP BY {col};".format(
            col=", ".join(obj_to_lst(column)), t=pgtable
        ), db_api='psql'
    ).to_dict(orient="records")
    
    return data
示例#12
0
文件: i.py 项目: jasp382/gasp
def check_last_id(db, pk, table):
    """
    Check last ID of a given table
    
    return 0 if there is no data
    """

    from gasp.sql.fm import q_to_obj

    q = "SELECT MAX({}) AS fid FROM {}".format(pk, table)
    d = q_to_obj(db, q, db_api='psql').fid.tolist()

    if not d[0]:
        return 0
    else:
        return d[0]
示例#13
0
def grs_rst(db, polyTbl, api='SQLITE'):
    """
    Simple selection, convert result to Raster
    """
    
    import datetime
    from gasp.sql.fm      import q_to_obj
    from gasp.gt.toshp.db import dbtbl_to_shp as db_to_grs
    from gasp.gt.torst    import shp_to_rst
    
    # Get Classes 
    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = q_to_obj(db, (
        "SELECT selection FROM {} "
        "WHERE selection IS NOT NULL "
        "GROUP BY selection"
    ).format(polyTbl), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)
    
    timeGasto = {0 : ('check_cls', time_b - time_a)}
    
    # Import data into GRASS and convert it to raster
    clsRst = {}
    tk = 1
    for cls in lulcCls:
        time_x = datetime.datetime.now().replace(microsecond=0)
        grsVect = db_to_grs(
            db, polyTbl, "geometry", "rule1_{}".format(str(cls)),
            inDB='psql' if api == 'POSTGIS' else 'sqlite',
            where="selection = {}".format(str(cls)), notTable=True,
            filterByReg=True, outShpIsGRASS=True
        )
        time_y = datetime.datetime.now().replace(microsecond=0)
        
        grsRst = shp_to_rst(
            grsVect, int(cls), None, None, "rst_rule1_{}".format(str(cls)),
            api='grass'
        )
        time_z = datetime.datetime.now().replace(microsecond=0)
        
        clsRst[int(cls)] = grsRst
        timeGasto[tk]    = ('import_{}'.format(cls), time_y - time_x)
        timeGasto[tk+1]  = ('torst_{}'.format(cls), time_z - time_y)
        
        tk += 2
    
    return clsRst, timeGasto
示例#14
0
def count_by_periods_with_certain_duration(db,
                                           PERIOD_INTERVAL,
                                           pgtable,
                                           TIME_FIELD,
                                           outTable,
                                           filterWhere=None):
    """
    Count rows in a pgtable by periods of X minutes
    
    PERIOD_INTERVAL = "01:00:00"
    """

    import pandas
    from gasp.pyt.tm import day_to_intervals2

    # Get Intervals
    INTERVALS = day_to_intervals2(PERIOD_INTERVAL)

    # For each interval/period, count the number of rows
    counting = None
    for _int_ in INTERVALS:
        QUERY = ("SELECT COUNT(*) AS count FROM {table} WHERE "
                 "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') >= "
                 "TO_TIMESTAMP('{minLower}', 'HH24:MI:SS') AND "
                 "TO_TIMESTAMP({timeCol}, 'HH24:MI:SS') < "
                 "TO_TIMESTAMP('{minUpper}', 'HH24:MI:SS'){whr}").format(
                     table=pgtable,
                     timeCol=TIME_FIELD,
                     minLower=_int_[0],
                     minUpper=_int_[1],
                     whr=""
                     if not filterWhere else " AND ({})".format(filterWhere))

        count = q_to_obj(db, QUERY, db_api='psql')

        count.rename(index={0: "{}-{}".format(_int_[0][:5], _int_[1][:5])},
                     inplace=True)

        if type(counting) != pandas.DataFrame:
            counting = count.copy()

        else:
            counting = counting.append(count, ignore_index=False)

    obj_to_tbl(counting, outTable)

    return outTable
示例#15
0
文件: duplicate.py 项目: jasp382/gasp
def show_duplicates_in_xls(db_name, table, pkCols, outFile, tableIsQuery=None):
    """
    Find duplicates and write these objects in a table
    """

    import pandas
    from gasp.pyt import obj_to_lst
    from gasp.sql.fm import q_to_obj
    from gasp.to import obj_to_tbl

    pkCols = obj_to_lst(pkCols)

    if not pkCols:
        raise ValueError("pkCols value is not valid")

    if not tableIsQuery:
        q = ("SELECT {t}.* FROM {t} INNER JOIN ("
             "SELECT {cls}, COUNT({cnt}) AS conta FROM {t} "
             "GROUP BY {cls}"
             ") AS foo ON {rel} "
             "WHERE conta > 1").format(t=table,
                                       cls=", ".join(pkCols),
                                       cnt=pkCols[0],
                                       rel=" AND ".join([
                                           "{t}.{c} = foo.{c}".format(t=table,
                                                                      c=col)
                                           for col in pkCols
                                       ]))

    else:
        q = ("SELECT foo.* FROM ({q_}) AS foo INNER JOIN ("
             "SELECT {cls}, COUNT({cnt}) AS conta "
             "FROM ({q_}) AS foo2 GROUP BY {cls}"
             ") AS jt ON {rel} "
             "WHERE conta > 1").format(q_=table,
                                       cls=", ".join(pkCols),
                                       cnt=pkCols[0],
                                       rel=" AND ".join([
                                           "foo.{c} = jt.{c}".format(c=x)
                                           for x in pkCols
                                       ]))

    data = q_to_obj(db_name, q, db_api='psql')

    obj_to_tbl(data, outFile)

    return outFile
示例#16
0
文件: i.py 项目: jasp382/gasp
def row_num(db, table, where=None, api='psql'):
    """
    Return the number of rows in Query
    
    API's Available:
    * psql;
    * sqlite;
    """

    from gasp.sql.fm import q_to_obj

    if not table.startswith('SELECT '):
        Q = "SELECT COUNT(*) AS nrows FROM {}{}".format(
            table, "" if not where else " WHERE {}".format(where))
    else:
        Q = "SELECT COUNT(*) AS nrows FROM ({}) AS foo".format(table)

    d = q_to_obj(db, Q, db_api=api)

    return int(d.iloc[0].nrows)
示例#17
0
文件: i.py 项目: jasp382/gasp
def cols_name(dbname, table, sanitizeSpecialWords=True, api='psql'):
    """
    Return the columns names of a table in one Database
    """

    if api == 'psql':
        c = sqlcon(dbname, sqlAPI='psql')

        cursor = c.cursor()
        cursor.execute("SELECT * FROM {} LIMIT 1;".format(table))
        colnames = [desc[0] for desc in cursor.description]

        if sanitizeSpecialWords:
            from gasp.cons.psql import PG_SPECIAL_WORDS

            for i in range(len(colnames)):
                if colnames[i] in PG_SPECIAL_WORDS:
                    colnames[i] = '"{}"'.format(colnames[i])

    elif api == 'sqlite':
        import sqlite3

        con = sqlite3.connect(dbname)

        cursor = con.execute("SELECT * FROM {} LIMIT 1".format(table))

        colnames = list(map(lambda x: x[0], cursor.description))

    elif api == 'mysql':
        from gasp.sql.fm import q_to_obj

        data = q_to_obj(dbname,
                        "SELECT * FROM {} LIMIT 1".format(table),
                        db_api='mysql')

        colnames = data.columns.values

    else:
        raise ValueError('API {} is not available'.format(api))

    return colnames
示例#18
0
文件: i.py 项目: jasp382/gasp
def lst_views(db, schema='public', basename=None):
    """
    List Views in database
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.fm import q_to_obj

    basename = obj_to_lst(basename)

    basenameStr = "" if not basename else "{}".format(" OR ".join(
        ["{} LIKE '%%{}%%'".format("table_name", b) for b in basename]))

    views = q_to_obj(
        db,
        ("SELECT table_name FROM information_schema.views "
         "WHERE table_schema='{}'{}").format(
             schema, "" if not basename else " AND ({})".format(basenameStr)),
        db_api='psql')

    return views.table_name.tolist()
示例#19
0
def tbl_ext(db, table, geomCol):
    """
    Return extent of the geometries in one pgtable
    """
    
    from gasp.sql.fm import q_to_obj
    
    q = (
        "SELECT MIN(ST_X(pnt_geom)) AS eleft, MAX(ST_X(pnt_geom)) AS eright, "
        "MIN(ST_Y(pnt_geom)) AS bottom, MAX(ST_Y(pnt_geom)) AS top "
        "FROM ("
            "SELECT (ST_DumpPoints({geomcol})).geom AS pnt_geom "
            "FROM {tbl}"
        ") AS foo"
    ).format(tbl=table, geomcol=geomCol)
    
    ext = q_to_obj(db, q, db_api='psql').to_dict(orient='index')[0]
    
    return [
        ext['eleft'], ext['bottom'], ext['eright'], ext['top']
    ]
示例#20
0
def get_osm_feat_by_rule(nomenclature):
    """
    Return OSM Features By rule
    """
    
    from gasp.sql.fm import q_to_obj
    
    Q = (
        "SELECT jtbl.{rellulccls} AS {rellulccls}, "
        "{osmfeat}.{key} AS {key}, {osmfeat}.{val} AS {val}, "
        "jtbl.{ruleName} AS {ruleName}, jtbl.{bufferCol} "
        "AS {bufferCol}, jtbl.{areaCol} AS {areaCol} "
        "FROM {osmfeat} INNER JOIN ("
            "SELECT {osmrel}.{relosmid}, {osmrel}.{rellulccls}, "
            "{rules}.{ruleID}, {rules}.{ruleName}, "
            "{osmrel}.{bufferCol}, {osmrel}.{areaCol} "
            "FROM {osmrel} "
            "INNER JOIN {rules} ON {osmrel}.{_ruleID} = {rules}.{ruleID} "
        ") AS jtbl ON {osmfeat}.{osmid} = jtbl.{relosmid}"
    ).format(
        osmfeat    = DB_SCHEMA["OSM_FEATURES"]["NAME"],
        osmid      = DB_SCHEMA["OSM_FEATURES"]["OSM_ID"],
        key        = DB_SCHEMA["OSM_FEATURES"]["OSM_KEY"],
        val        = DB_SCHEMA["OSM_FEATURES"]["OSM_VALUE"],
        osmrel     = DB_SCHEMA[nomenclature]["OSM_RELATION"],
        relosmid   = DB_SCHEMA[nomenclature]["OSM_FK"],
        rellulccls = DB_SCHEMA[nomenclature]["CLS_FK"],
        _ruleID    = DB_SCHEMA[nomenclature]["RULE_FK"],
        rules      = DB_SCHEMA["RULES"]["NAME"],
        ruleID     = DB_SCHEMA["RULES"]["RULE_ID"],
        ruleName   = DB_SCHEMA["RULES"]["RULE_NAME"],
        bufferCol  = DB_SCHEMA[nomenclature]["RULES_FIELDS"]["BUFFER"],
        areaCol    = DB_SCHEMA[nomenclature]["RULES_FIELDS"]["AREA"]
    )
    
    return q_to_obj(PROCEDURE_DB, Q, db_api='sqlite')
示例#21
0
文件: tags.py 项目: jasp382/gasp
def get_not_used_tags(OSM_FILE, OUT_TBL):
    """
    Use a file OSM to detect tags not considered in the
    OSM2LULC procedure
    """

    import os
    from gasp.to import obj_to_tbl
    from gasp.gt.attr import sel_by_attr
    from gasp.sql.fm import q_to_obj
    from gasp.pyt.df.split import df_split
    from gasp.pyt.oss import fprop
    from gasp.gt.toshp.osm import osm_to_gpkg

    OSM_TAG_MAP = {
        "DB":
        os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'osmtolulc.sqlite'),
        "OSM_FEAT":
        "osm_features",
        "KEY_COL":
        "key",
        "VALUE_COL":
        "value",
        "GEOM_COL":
        "geom"
    }

    WORKSPACE = os.path.dirname(OUT_TBL)

    sqdb = osm_to_gpkg(
        OSM_FILE, os.path.join(WORKSPACE,
                               fprop(OSM_FILE, 'fn') + '.gpkg'))

    # Get Features we are considering
    ourOSMFeatures = q_to_obj(
        OSM_TAG_MAP["DB"],
        ("SELECT {key} AS key_y, {value} AS value_y, {geom} AS geom_y "
         "FROM {tbl}").format(key=OSM_TAG_MAP["KEY_COL"],
                              value=OSM_TAG_MAP["VALUE_COL"],
                              geom=OSM_TAG_MAP["GEOM_COL"],
                              tbl=OSM_TAG_MAP["OSM_FEAT"]),
        db_api='sqlite')

    # Get Features in File
    TABLES_TAGS = {
        'points': ['highway', 'man_made', 'building'],
        'lines':
        ['highway', 'waterway', 'aerialway', 'barrier', 'man_made', 'railway'],
        'multipolygons': [
            'aeroway', 'amenity', 'barrier', 'building', 'craft', 'historic',
            'land_area', ''
            'landuse', 'leisure', 'man_made', 'military', 'natural', 'office',
            'place', 'shop', 'sport', 'tourism', 'waterway', 'power',
            'railway', 'healthcare', 'highway'
        ]
    }

    Qs = [
        " UNION ALL ".join([(
            "SELECT '{keycol}' AS key, {keycol} AS value, "
            "'{geomtype}' AS geom FROM {tbl} WHERE "
            "{keycol} IS NOT NULL"
        ).format(
            keycol=c, geomtype='Point' if table == 'points' else 'Line' \
                if table == 'lines' else 'Polygon',
            tbl=table
        ) for c in TABLES_TAGS[table]]) for table in TABLES_TAGS
    ]

    fileOSMFeatures = q_to_obj(sqdb,
                               ("SELECT key, value, geom FROM ({}) AS foo "
                                "GROUP BY key, value, geom").format(
                                    " UNION ALL ".join(Qs)),
                               db_api='sqlite')

    _fileOSMFeatures = fileOSMFeatures.merge(
        ourOSMFeatures,
        how='outer',
        left_on=["key", "value", "geom"],
        right_on=["key_y", "value_y", "geom_y"])

    # Select OSM Features of file without correspondence
    _fileOSMFeatures["isnew"] = _fileOSMFeatures.key_y.fillna(value='nenhum')

    newTags = _fileOSMFeatures[_fileOSMFeatures.isnew == 'nenhum']

    newTags["value"] = newTags.value.str.replace("'", "''")

    newTags["whr"] = newTags.key + "='" + newTags.value + "'"

    # Export tags not being used to new shapefile
    def to_regular_str(row):
        san_str = row.whr

        row["whr_san"] = san_str

        return row

    for t in TABLES_TAGS:
        if t == 'points':
            filterDf = newTags[newTags.geom == 'Point']

        elif t == 'lines':
            filterDf = newTags[newTags.geom == 'Line']

        elif t == 'multipolygons':
            filterDf = newTags[newTags.geom == 'Polygon']

        if filterDf.shape[0] > 500:
            dfs = df_split(filterDf, 500, nrows=True)
        else:
            dfs = [filterDf]

        Q = "SELECT * FROM {} WHERE {}".format(
            t, filterDf.whr.str.cat(sep=" OR "))

        i = 1
        for df in dfs:
            fn = t + '.shp' if len(dfs) == 1 else '{}_{}.shp'.format(t, str(i))
            try:
                shp = sel_by_attr(sqdb,
                                  Q.format(t, df.whr.str.cat(sep=" OR ")),
                                  os.path.join(WORKSPACE, fn),
                                  api_gis='ogr')
            except:
                __df = df.apply(lambda x: to_regular_str(x), axis=1)

                shp = sel_by_attr(sqdb,
                                  Q.format(t, __df.whr.str.cat(sep=" OR ")),
                                  os.path.join(WORKSPACE, fn))

            i += 1

    # Export OUT_TBL with tags not being used
    newTags.drop(['key_y', 'value_y', 'geom_y', 'isnew', 'whr'],
                 axis=1,
                 inplace=True)
    obj_to_tbl(newTags, OUT_TBL, sheetsName="new_tags", sanitizeUtf8=True)

    return OUT_TBL
示例#22
0
文件: mod5.py 项目: jasp382/gasp
def basic_buffer(osmdb, lineTable, dataFolder, apidb='SQLITE'):
    """
    Data from Lines table to Polygons using a basic buffering stratagie
    """

    import datetime
    from gasp.sql.fm import q_to_obj
    if apidb == 'POSTGIS':
        from gasp.gql.prox import st_buffer
    else:
        from gasp.gql.prox import splite_buffer as st_buffer
    from gasp.gt.torst import shp_to_rst
    from gasp.gt.toshp.cff import shp_to_grs

    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = q_to_obj(
        osmdb,
        ("SELECT basic_buffer FROM {} WHERE basic_buffer IS NOT NULL "
         "GROUP BY basic_buffer").format(lineTable),
        db_api='psql'
        if apidb == 'POSTGIS' else 'sqlite').basic_buffer.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)

    timeGasto = {0: ('check_cls', time_b - time_a)}

    clsRst = {}
    tk = 1
    for cls in lulcCls:
        # Run BUFFER Tool
        time_x = datetime.datetime.now().replace(microsecond=0)
        bb_file = st_buffer(osmdb,
                            lineTable,
                            "bf_basic_buffer",
                            "geometry",
                            os.path.join(
                                dataFolder,
                                'bb_rule5_{}.shp'.format(str(int(cls)))),
                            whrClause="basic_buffer={}".format(str(int(cls))),
                            outTblIsFile=True,
                            dissolve="ALL",
                            cols_select="basic_buffer")
        time_y = datetime.datetime.now().replace(microsecond=0)

        # Data TO GRASS
        grsVect = shp_to_grs(bb_file,
                             "bb_{}".format(int(cls)),
                             asCMD=True,
                             filterByReg=True)
        time_z = datetime.datetime.now().replace(microsecond=0)

        # Data to Raster
        rstVect = shp_to_rst(grsVect,
                             int(cls),
                             None,
                             None,
                             "rbb_{}".format(int(cls)),
                             api="grass")
        time_w = datetime.datetime.now().replace(microsecond=0)

        clsRst[int(cls)] = rstVect

        timeGasto[tk] = ('do_buffer_{}'.format(cls), time_y - time_x)
        timeGasto[tk + 1] = ('import_{}'.format(cls), time_z - time_y)
        timeGasto[tk + 2] = ('torst_{}'.format(cls), time_w - time_z)

        tk += 3

    return clsRst, timeGasto
示例#23
0
文件: mod5.py 项目: jasp382/gasp
def num_base_buffer(osmdb,
                    lineTbl,
                    folder,
                    cells,
                    srscode,
                    rtemplate,
                    api='SQLITE'):
    """
    Data from Lines to Polygons
    """

    import datetime
    from threading import Thread
    from gasp.sql.fm import q_to_obj
    if api == 'SQLITE':
        from gasp.gql.prox import splite_buffer as st_buffer
    else:
        from gasp.gql.prox import st_buffer
    from gasp.gt.torst import shp_to_rst

    # Get LULC Classes to be selected
    time_a = datetime.datetime.now().replace(microsecond=0)
    lulcCls = q_to_obj(
        osmdb,
        ("SELECT basic_buffer FROM {} WHERE basic_buffer IS NOT NULL "
         "GROUP BY basic_buffer").format(lineTbl),
        db_api='psql' if api == 'POSTGIS' else 'sqlite').basic_buffer.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)

    timeGasto = {0: ('check_cls', time_b - time_a)}
    clsRst = {}

    def exportAndBufferB(CLS, cnt):
        # Run BUFFER Tool
        time_x = datetime.datetime.now().replace(microsecond=0)
        bb_file = st_buffer(osmdb,
                            lineTbl,
                            "bf_basic_buffer",
                            "geometry",
                            os.path.join(
                                folder,
                                'bb_rule5_{}.shp'.format(str(int(CLS)))),
                            whrClause="basic_buffer={}".format(str(int(CLS))),
                            outTblIsFile=True,
                            dissolve=None,
                            cols_select="basic_buffer")
        time_y = datetime.datetime.now().replace(microsecond=0)

        # To raster
        rstCls = shp_to_rst(bb_file,
                            None,
                            cells,
                            0,
                            os.path.join(folder,
                                         'rst_bbfr_{}.tif'.format(CLS)),
                            epsg=srscode,
                            rst_template=rtemplate,
                            api='gdal')
        time_z = datetime.datetime.now().replace(microsecond=0)

        clsRst[CLS] = rstCls
        timeGasto[cnt + 1] = ('buffer_{}'.format(str(CLS)), time_y - time_x)
        timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y)

    thrds = [
        Thread(name="r5-{}".format(lulcCls[i]),
               target=exportAndBufferB,
               args=(lulcCls[i], (i + 1) * 10)) for i in range(len(lulcCls))
    ]

    for t in thrds:
        t.start()
    for t in thrds:
        t.join()

    return clsRst, timeGasto
示例#24
0
def num_selection(osmdb, polyTbl, folder,
                  cellsize, srscode, rstTemplate, api='SQLITE'):
    """
    Select and Convert to Raster
    """
    
    import datetime;          import os
    from threading            import Thread
    if api == 'SQLITE':
        from gasp.gt.attr     import sel_by_attr
    else:
        from gasp.gt.toshp.db import dbtbl_to_shp as sel_by_attr
    from gasp.sql.fm          import q_to_obj
    from gasp.gt.torst        import shp_to_rst
    
    # Get classes in data
    time_a = datetime.datetime.now().replace(microsecond=0)
    classes = q_to_obj(osmdb, (
        "SELECT selection FROM {} "
        "WHERE selection IS NOT NULL "
        "GROUP BY selection"
    ).format(
        polyTbl
    ), db_api='psql' if api == 'POSTGIS' else 'sqlite').selection.tolist()
    time_b = datetime.datetime.now().replace(microsecond=0)
    
    timeGasto = {0 : ('check_cls', time_b - time_a)}
    
    # Select and Export
    clsRst = {}
    SQL_Q = "SELECT {lc} AS cls, geometry FROM {tbl} WHERE selection={lc}"
    def FilterAndExport(CLS, cnt):
        time_x = datetime.datetime.now().replace(microsecond=0)
        if api == 'SQLITE':
            shp = sel_by_attr(
                osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl),
                os.path.join(folder, 'sel_{}.shp'.format(str(CLS))),
                api_gis='ogr'
            )
        else:
            shp = sel_by_attr(
                osmdb, SQL_Q.format(lc=str(CLS), tbl=polyTbl), "geometry",
                os.path.join(folder, 'sel_{}.shp'.format(str(CLS))),
                api='pgsql2shp', tableIsQuery=True
            )
        time_y = datetime.datetime.now().replace(microsecond=0)
        
        rstCls = shp_to_rst(
            shp, None, cellsize, 0,
            os.path.join(folder, 'sel_{}.tif'.format(str(CLS))),
            epsg=srscode, rst_template=rstTemplate, api='gdal'
        )
        time_z = datetime.datetime.now().replace(microsecond=0)
        
        clsRst[int(CLS)] = rstCls
        timeGasto[cnt + 1] = ('toshp_{}'.format(str(CLS)), time_y - time_x)
        timeGasto[cnt + 2] = ('torst_{}'.format(str(CLS)), time_z - time_y)
    
    trs = []
    for i in range(len(classes)):
        trs.append(Thread(
            name="lll{}".format(str(classes[i])),
            target=FilterAndExport, args=(classes[i], (i+1) * 10)
        ))
    
    for t in trs: t.start()
    for t in trs: t.join()
    
    return clsRst, timeGasto
示例#25
0
def run_query_for_values_in_col(db, query,
                               table_interest_col, interest_col,
                               outworkspace):
    """
    Execute a query for each value in one column
    In each iteration, the values may participate in the query.
    
    Export the several tables to excel
    
    Example:
    ID_PERCURSO | PARAGEM |    DIA     | GEOM
        0       |   255   |'2018-01-01 | xxxx
        0       |   255   |'2018-01-01 | xxxx
        0       |   254   |'2018-01-01 | xxxx
        0       |   254   |'2018-01-01 | xxxx
        0       |   255   |'2018-01-02 | xxxx
        0       |   255   |'2018-01-02 | xxxx
        0       |   254   |'2018-01-02 | xxxx
        0       |   254   |'2018-01-02 | xxxx
    
    For a query as:
    SELECT ID_PERCURSO, PARAGEM, GEOM, DIA, COUNT(PARAGEM) AS conta FROM
    table WHERE DIA={} GROUP BY PARAGEM, GEOM, DIA;
    
    This method will generate two tables:
    First table:
    ID_PERCURSO | PARAGEM |    DIA     | GEOM | conta
         0     |   255   |'2018-01-01 | xxxx |   2
         0     |   254   |'2018-01-01 | xxxx |   2
    
    Second table:
    ID_PERCURSO | PARAGEM |    DIA     | GEOM | conta
          0     |   255   |'2018-01-02 | xxxx |   2
          0     |   254   |'2018-01-02 | xxxx |   2
    
    {} will be replaced for every value in the interest_column that will
    be iterated one by one
    """
    
    import os
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import cols_type
    from gasp.to     import obj_to_tbl
    
    fields_types = cols_type(db, table_interest_col)
    
    # Get  unique values
    VALUES = q_to_obj(db,
        "SELECT {col} FROM {t} GROUP BY {col}".format(
            col=interest_col, t=table_interest_col
        ), db_api='psql'
    )[interest_col].tolist()
    
    # Aplly query for every value in VALUES
    # Write data in excel
    for value in VALUES:
        data = q_to_obj(db, query.format(
            str(value[0]) if fields_types[interest_col] != str else \
            "'{}'".format(str(value[0]))
        ), db_api='psql')
        
        obj_to_tbl(data, os.path.join(outworkspace, '{}_{}.xlsx'.format(
            table_interest_col, str(value[0])
        )))
示例#26
0
def rst_pnt_to_build(osmdb, pntTable, polyTable, api_db='SQLITE'):
    """
    Replace buildings with tag yes using the info in the Points Layer
    
    Only used for URBAN ATLAS and CORINE LAND COVER
    """

    import datetime as dt
    from gasp.sql.i import row_num as cnt_row
    from gasp.sql.fm import q_to_obj
    from gasp.gt.toshp.db import dbtbl_to_shp as db_to_shp
    from gasp.gql.ovly import feat_within, feat_not_within
    from gasp.gt.torst import shp_to_rst

    time_a = dt.datetime.now().replace(microsecond=0)
    new_build = feat_within(
        osmdb,
        ("(SELECT buildings AS pnt_build, geometry AS pnt_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(pntTable),
        "pnt_geom",
        ("(SELECT buildings AS poly_build, geometry AS poly_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(polyTable),
        "poly_geom",
        "new_buildings",
        inTblCols="pnt_build AS cls",
        withinCols="poly_geom AS geometry",
        outTblIsFile=None,
        apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db,
        geom_col="geometry")
    time_b = dt.datetime.now().replace(microsecond=0)

    yes_build = feat_not_within(
        osmdb,
        ("(SELECT buildings AS poly_build, geometry AS poly_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(polyTable),
        "poly_geom",
        ("(SELECT buildings AS pnt_build, geometry AS pnt_geom "
         "FROM {} WHERE buildings IS NOT NULL)").format(pntTable),
        "pnt_geom",
        "yes_builds",
        inTblCols="poly_geom AS geometry, 11 AS cls",
        outTblIsFile=None,
        apiToUse="OGR_SPATIALITE" if api_db != "POSTGIS" else api_db,
        geom_col="geometry")
    time_c = dt.datetime.now().replace(microsecond=0)

    resLayers = {}
    N11 = cnt_row(osmdb,
                  yes_build,
                  api='psql' if api_db == 'POSTGIS' else 'sqlite')
    time_d = dt.datetime.now().replace(microsecond=0)

    if N11:
        # Data to GRASS GIS
        grsBuild11 = db_to_shp(
            osmdb,
            yes_build,
            "geometry",
            "yes_builds",
            notTable=True,
            filterByReg=True,
            inDB='psql' if api_db == 'POSTGIS' else 'sqlite',
            outShpIsGRASS=True)
        time_f = dt.datetime.now().replace(microsecond=0)

        # To raster
        rstBuild11 = shp_to_rst(grsBuild11,
                                11,
                                None,
                                None,
                                "rst_builds11",
                                api="grass")
        time_g = dt.datetime.now().replace(microsecond=0)

        resLayers[11] = [rstBuild11]

    else:
        time_f = None
        time_g = None

    # Add data into GRASS GIS
    lulcCls = q_to_obj(
        osmdb,
        "SELECT cls FROM {} GROUP BY cls".format(new_build),
        db_api='psql' if api_db == 'POSTGIS' else 'sqlite').cls.tolist()

    timeGasto = {
        0: ('intersect', time_b - time_a),
        1: ('disjoint', time_c - time_b),
        2: ('count_b11', time_d - time_c),
        3: None if not time_f else ('import_b11', time_f - time_d),
        4: None if not time_g else ('torst_b11', time_g - time_f),
    }
    tk = 5
    for cls in lulcCls:
        time_x = dt.datetime.now().replace(microsecond=0)
        shp = db_to_shp(osmdb,
                        new_build,
                        "geometry",
                        "nbuild_{}".format(str(cls)),
                        "cls = {}".format(cls),
                        notTable=True,
                        filterByReg=True,
                        outShpIsGRASS=True)
        time_y = dt.datetime.now().replace(microsecond=0)

        rstb = shp_to_rst(shp,
                          int(cls),
                          None,
                          None,
                          "rst_nbuild_{}".format(str(cls)),
                          api="grass")
        time_z = dt.datetime.now().replace(microsecond=0)

        if int(cls) == 11 and int(cls) in resLayers:
            resLayers[int(cls)].append(rstb)

        else:
            resLayers[int(cls)] = [rstb]

        timeGasto[tk] = ('import_bn{}'.format(cls), time_y - time_x)
        timeGasto[tk + 1] = ('torst_bn{}'.format(cls), time_z - time_y)

        tk += 2

    return resLayers, timeGasto
示例#27
0
文件: tomtx.py 项目: jasp382/gasp
def tbl_to_area_mtx(db, tbl, cola, colb, result):
    """
    Table to Matrix
    
    Table as:
        FID | col_a | col_b | geom
    0 |  1  |   A   |   A   | ....
    0 |  2  |   A   |   B   | ....
    0 |  3  |   A   |   A   | ....
    0 |  4  |   A   |   C   | ....
    0 |  5  |   A   |   B   | ....
    0 |  6  |   B   |   A   | ....
    0 |  7  |   B   |   A   | ....
    0 |  8  |   B   |   B   | ....
    0 |  9  |   B   |   B   | ....
    0 | 10  |   C   |   A   | ....
    0 | 11  |   C   |   B   | ....
    0 | 11  |   C   |   D   | ....
    
    To:
    classe | A | B | C | D
       A   |   |   |   | 
       B   |   |   |   |
       C   |   |   |   |
       D   |   |   |   |
    
    cola = rows
    colb = cols
    """

    from gasp.sql.to import q_to_ntbl
    from gasp.sql.fm import q_to_obj

    ref_val = q_to_obj(db,
                       ("SELECT fcol FROM ("
                        "SELECT CAST({map1_cls} AS text) AS fcol FROM {tbl} "
                        "GROUP BY {map1_cls} "
                        "UNION ALL "
                        "SELECT CAST({map2_cls} AS text) AS fcol FROM {tbl} "
                        "GROUP BY {map2_cls}"
                        ") AS foo GROUP BY fcol ORDER BY fcol").format(
                            tbl=tbl,
                            map1_cls=cola,
                            map2_cls=colb,
                        ),
                       db_api='psql').fcol.tolist()

    mtx_tbl = q_to_ntbl(db,
                        result,
                        ("SELECT * FROM crosstab('"
                         "SELECT CASE "
                         "WHEN foo.{map1_cls} IS NOT NULL "
                         "THEN foo.{map1_cls} ELSE jtbl.flyr "
                         "END AS lulc1_cls, CASE "
                         "WHEN foo.{map2_cls} IS NOT NULL "
                         "THEN foo.{map2_cls} ELSE jtbl.slyr "
                         "END AS lulc2_cls, CASE "
                         "WHEN foo.garea IS NOT NULL "
                         "THEN round(CAST(foo.garea / 1000000 AS numeric)"
                         ", 3) ELSE 0 "
                         "END AS garea FROM ("
                         "SELECT CAST({map1_cls} AS text) AS {map1_cls}, "
                         "CAST({map2_cls} AS text) AS {map2_cls}, "
                         "SUM(ST_Area(geom)) AS garea "
                         "FROM {tbl} GROUP BY {map1_cls}, {map2_cls}"
                         ") AS foo FULL JOIN ("
                         "SELECT f.flyr, s.slyr FROM ("
                         "SELECT CAST({map1_cls} AS text) AS flyr "
                         "FROM {tbl} GROUP BY {map1_cls}"
                         ") AS f, ("
                         "SELECT CAST({map2_cls} AS text) AS slyr "
                         "FROM {tbl} GROUP BY {map2_cls}"
                         ") AS s"
                         ") AS jtbl "
                         "ON foo.{map1_cls} = jtbl.flyr AND "
                         "foo.{map2_cls} = jtbl.slyr "
                         "ORDER BY 1,2"
                         "') AS ct("
                         "lulc_cls text, {crossCols}"
                         ")").format(crossCols=", ".join(
                             ["cls_{} numeric".format(c) for c in ref_val]),
                                     tbl=tbl,
                                     map1_cls=cola,
                                     map2_cls=colb),
                        api='psql')

    return mtx_tbl
示例#28
0
文件: db.py 项目: jasp382/gasp
def dbtbl_to_shp(db, tbl, geom_col, outShp, where=None, inDB='psql',
                 notTable=None, filterByReg=None, outShpIsGRASS=None,
                 tableIsQuery=None, api='psql', epsg=None):
    """
    Database Table to Feature Class file
    
    idDB Options:
    * psql
    * sqlite
    
    api Options:
    * psql
    * sqlite
    * pgsql2shp
    
    if outShpIsGRASS if true, the method assumes that outShp is
    a GRASS Vector. That implies that a GRASS Session was been
    started already. 
    """

    from gasp.gt.toshp import df_to_shp
    
    if outShpIsGRASS:
        from gasp import exec_cmd
        from gasp.cons.psql import con_psql

        db_con = con_psql()
        
        whr = "" if not where else " where=\"{}\"".format(where)
        
        cmd_str = (
            "v.in.ogr input=\"PG:host={} dbname={} user={} password={} "
            "port={}\" output={} layer={} geometry={}{}{}{} -o --overwrite --quiet"
        ).format(
            db_con["HOST"], db, db_con["USER"], db_con["PASSWORD"],
            db_con["PORT"], outShp, tbl, geom_col, whr,
            " -t" if notTable else "", " -r" if filterByReg else ""
        ) if inDB == 'psql' else (
            "v.in.ogr -o input={} layer={} output={}{}{}{}"
        ).format(db, tbl, outShp, whr,
            " -t" if notTable else "", " -r" if filterByReg else ""
        ) if inDB == 'sqlite' else None
        
        rcmd = exec_cmd(cmd_str)
    
    else:
        if api == 'pgsql2shp':
            from gasp import exec_cmd
            from gasp.cons.psql import con_psql

            db_con = con_psql()
            
            outcmd = exec_cmd((
                'pgsql2shp -f {out} -h {hst} -u {usr} -p {pt} -P {pas}{geom} '
                '{bd} {t}'
            ).format(
                hst=db_con['HOST'], usr=db_con["USER"], pt=db_con["PORT"],
                pas=db_con['PASSWORD'], bd=db, out=outShp,
                t=tbl if not tableIsQuery else '"{}"'.format(tbl),
                geom="" if not geom_col else " -g {}".format(geom_col)
            ))
        
        elif api == 'psql' or api == 'sqlite':
            from gasp.sql.fm import q_to_obj
            
            q = "SELECT * FROM {}".format(tbl) if not tableIsQuery else tbl
            
            df = q_to_obj(db, q, db_api=api, geomCol=geom_col, epsg=epsg)
            
            outsh = df_to_shp(df, outShp)
        
        else:
            raise ValueError((
                'api value must be \'psql\', \'sqlite\' or \'pgsql2shp\''))
    
    return outShp
示例#29
0
文件: time.py 项目: jasp382/gasp
def ID_rows_with_temporal_proximity_by_entities(db, table, entity_field,
                                                day_field, hour_field,
                                                hour_decimal_field,
                                                time_tolerance, outXlsPath):
    """
    Retrieve rows from one pgtable with some temporal proximity
    
    Table structure should be
    entity |     day    | hour | hour_decimal
      0    | 2018-01-02 |  5   |   5,10
      0    | 2018-01-03 |  4   |   4,15
      0    | 2018-01-02 |  5   |   5,12
      0    | 2018-01-02 |  5   |   5,8
      1    | 2018-01-02 |  4   |   4,10
      1    | 2018-01-02 |  5   |   5,12
      1    | 2018-01-02 |  4   |   4,20
      1    | 2018-01-02 |  4   |   4,12
      1    | 2018-01-02 |  4   |   4,6
    
    For a time_tolerance of 5 minutes, the output table will have
    the rows with a temporal difference inside/bellow that time tolerance
    
    entity_field could be more than one field
    
    This method only identifies if one entity, for one day, has rows 
    very close of each others, in terms of time.
    
    Not a good strategy for large tables. For large tables, SQL based methods
    are needed
    """

    import pandas
    from gasp.pyt import obj_to_lst
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i import cols_type
    from gasp.to import obj_to_tbl

    entity_field = obj_to_lst(entity_field)
    COLS = entity_field + [day_field, hour_field]
    COLS_TYPE = cols_type(db, table)

    # TIME TOLERANCE IN HOURS
    TIME_TOLERANCE = time_tolerance / 60.0

    def thereIsRowsSameTimeInt(row):
        whr = []
        for c in COLS:
            if COLS_TYPE[c] == str:
                whr.append("{}='{}'".format(c, row[c]))
            else:
                whr.append("{}={}".format(c, row[c]))

        hourRows = q_to_obj(db,
                            "SELECT {} FROM {} WHERE {}".format(
                                hour_decimal_field, table, " AND ".join(whr)),
                            db_api='psql')[hour_decimal_field].tolist()

        for i in range(len(hourRows)):
            for e in range(i + 1, len(hourRows)):
                dif = abs(hourRows[i][0] - hourRows[e][0])

                if dif < TIME_TOLERANCE:
                    break

            if dif < TIME_TOLERANCE:
                break

        if dif < TIME_TOLERANCE:
            row['time_difference'] = 1
        else:
            row['time_difference'] = 0

        return row

    # Count entity occourrences for one day and hour
    countsByEntityTime = q_to_obj(
        db,
        ("SELECT {scols}, conta FROM "
         "(SELECT {scols}, COUNT({ent}) AS conta FROM {tbl} "
         "GROUP BY {scols}) AS foo WHERE conta > 1").format(
             scols=', '.join(COLS), ent=entity_field[0], tbl=table),
        db_api='psql')

    # For each row in the last count, When count is > 1
    # Check time difference between rows for one day and hour
    countsByEntityTime = countsByEntityTime.apply(
        lambda x: thereIsRowsSameTimeInt(x), axis=1)

    obj_to_tbl(countsByEntityTime, outXlsPath)

    return outXlsPath
示例#30
0
def check_shape_diff(SHAPES_TO_COMPARE, OUT_FOLDER, REPORT, DB,
                     GRASS_REGION_TEMPLATE):
    """
    Script to check differences between pairs of Feature Classes
    
    Suponha que temos diversas Feature Classes (FC) e que cada uma delas
    possui um determinado atributo; imagine tambem que,
    considerando todos os pares possiveis entre estas FC,
    se pretende comparar as diferencas na distribuicao dos valores
    desse atributo para cada par.
    
    * Dependencias:
    - GRASS;
    - PostgreSQL;
    - PostGIS.
    """

    import datetime
    import os
    import pandas
    from gasp.sql.fm import q_to_obj
    from gasp.to import db_to_tbl
    from gasp.sql.to import df_to_db
    from gasp.gt.toshp.cff import shp_to_shp
    from gasp.gt.toshp.db import dbtbl_to_shp
    from gasp.gt.toshp.rst import rst_to_polyg
    from gasp.gql.to import shp_to_psql
    from gasp.gql.tomtx import tbl_to_area_mtx
    from gasp.gt.prop.ff import check_isRaster
    from gasp.pyt.oss import fprop
    from gasp.sql.db import create_db
    from gasp.sql.tbl import tbls_to_tbl
    from gasp.sql.to import q_to_ntbl
    from gasp.gql.cln import fix_geom
    from gasp.to import db_to_tbl

    # Check if folder exists, if not create it
    if not os.path.exists(OUT_FOLDER):
        from gasp.pyt.oss import mkdir
        mkdir(OUT_FOLDER, overwrite=None)
    else:
        raise ValueError('{} already exists!'.format(OUT_FOLDER))

    from gasp.gt.wenv.grs import run_grass

    gbase = run_grass(OUT_FOLDER,
                      grassBIN='grass78',
                      location='shpdif',
                      srs=GRASS_REGION_TEMPLATE)

    import grass.script as grass
    import grass.script.setup as gsetup

    gsetup.init(gbase, OUT_FOLDER, 'shpdif', 'PERMANENT')

    from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp
    from gasp.gt.torst import rst_to_grs
    from gasp.gt.tbl.fld import rn_cols

    # Convert to SHAPE if file is Raster
    i = 0
    _SHP_TO_COMPARE = {}
    for s in SHAPES_TO_COMPARE:
        isRaster = check_isRaster(s)

        if isRaster:
            # To GRASS
            rstName = fprop(s, 'fn')
            inRst = rst_to_grs(s, "rst_" + rstName, as_cmd=True)
            # To Vector
            d = rst_to_polyg(inRst,
                             rstName,
                             rstColumn="lulc_{}".format(i),
                             gisApi="grass")

            # Export Shapefile
            shp = grs_to_shp(d, os.path.join(OUT_FOLDER, d + '.shp'), "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(i)

        else:
            # To GRASS
            grsV = shp_to_grs(s, fprop(s, 'fn'), asCMD=True)

            # Change name of column with comparing value
            ncol = "lulc_{}".format(str(i))
            rn_cols(grsV, {SHAPES_TO_COMPARE[s]: "lulc_{}".format(str(i))},
                    api="grass")

            # Export
            shp = grs_to_shp(grsV, os.path.join(OUT_FOLDER, grsV + '_rn.shp'),
                             "area")

            _SHP_TO_COMPARE[shp] = "lulc_{}".format(str(i))

        i += 1

    SHAPES_TO_COMPARE = _SHP_TO_COMPARE
    __SHAPES_TO_COMPARE = SHAPES_TO_COMPARE

    # Create database
    create_db(DB, api='psql')
    """ Union SHAPEs """

    UNION_SHAPE = {}
    FIX_GEOM = {}

    SHPS = list(__SHAPES_TO_COMPARE.keys())
    for i in range(len(SHPS)):
        for e in range(i + 1, len(SHPS)):
            # Optimized Union
            print("Union between {} and {}".format(SHPS[i], SHPS[e]))
            time_a = datetime.datetime.now().replace(microsecond=0)
            __unShp = optimized_union_anls(
                SHPS[i],
                SHPS[e],
                os.path.join(OUT_FOLDER, "un_{}_{}.shp".format(i, e)),
                GRASS_REGION_TEMPLATE,
                os.path.join(OUT_FOLDER, "work_{}_{}".format(i, e)),
                multiProcess=True)
            time_b = datetime.datetime.now().replace(microsecond=0)
            print(time_b - time_a)

            # Rename cols
            unShp = rn_cols(
                __unShp, {
                    "a_" + __SHAPES_TO_COMPARE[SHPS[i]]:
                    __SHAPES_TO_COMPARE[SHPS[i]],
                    "b_" + __SHAPES_TO_COMPARE[SHPS[e]]:
                    __SHAPES_TO_COMPARE[SHPS[e]]
                })

            UNION_SHAPE[(SHPS[i], SHPS[e])] = unShp

    # Send data to postgresql
    SYNTH_TBL = {}

    for uShp in UNION_SHAPE:
        # Send data to PostgreSQL
        union_tbl = shp_to_psql(DB, UNION_SHAPE[uShp], api='shp2pgsql')

        # Produce table with % of area equal in both maps
        areaMapTbl = q_to_ntbl(
            DB,
            "{}_syn".format(union_tbl),
            ("SELECT CAST('{lulc_1}' AS text) AS lulc_1, "
             "CAST('{lulc_2}' AS text) AS lulc_2, "
             "round("
             "CAST(SUM(g_area) / 1000000 AS numeric), 4"
             ") AS agree_area, round("
             "CAST((SUM(g_area) / MIN(total_area)) * 100 AS numeric), 4"
             ") AS agree_percentage, "
             "round("
             "CAST(MIN(total_area) / 1000000 AS numeric), 4"
             ") AS total_area FROM ("
             "SELECT {map1_cls}, {map2_cls}, ST_Area(geom) AS g_area, "
             "CASE "
             "WHEN {map1_cls} = {map2_cls} "
             "THEN 1 ELSE 0 "
             "END AS isthesame, total_area FROM {tbl}, ("
             "SELECT SUM(ST_Area(geom)) AS total_area FROM {tbl}"
             ") AS foo2"
             ") AS foo WHERE isthesame = 1 "
             "GROUP BY isthesame").format(
                 lulc_1=fprop(uShp[0], 'fn'),
                 lulc_2=fprop(uShp[1], 'fn'),
                 map1_cls=__SHAPES_TO_COMPARE[uShp[0]],
                 map2_cls=__SHAPES_TO_COMPARE[uShp[1]],
                 tbl=union_tbl),
            api='psql')

        # Produce confusion matrix for the pair in comparison
        matrixTbl = tbl_to_area_mtx(DB, union_tbl,
                                    __SHAPES_TO_COMPARE[uShp[0]],
                                    __SHAPES_TO_COMPARE[uShp[1]],
                                    union_tbl + '_mtx')

        SYNTH_TBL[uShp] = {"TOTAL": areaMapTbl, "MATRIX": matrixTbl}

    # UNION ALL TOTAL TABLES
    total_table = tbls_to_tbl(DB, [SYNTH_TBL[k]["TOTAL"] for k in SYNTH_TBL],
                              'total_table')

    # Create table with % of agreement between each pair of maps
    mapsNames = q_to_obj(
        DB,
        ("SELECT lulc FROM ("
         "SELECT lulc_1 AS lulc FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc FROM {tbl} GROUP BY lulc_2"
         ") AS lu GROUP BY lulc ORDER BY lulc").format(tbl=total_table),
        db_api='psql').lulc.tolist()

    FLDS_TO_PIVOT = ["agree_percentage", "total_area"]

    Q = ("SELECT * FROM crosstab('"
         "SELECT CASE "
         "WHEN foo.lulc_1 IS NOT NULL THEN foo.lulc_1 ELSE jtbl.tmp1 "
         "END AS lulc_1, CASE "
         "WHEN foo.lulc_2 IS NOT NULL THEN foo.lulc_2 ELSE jtbl.tmp2 "
         "END AS lulc_2, CASE "
         "WHEN foo.{valCol} IS NOT NULL THEN foo.{valCol} ELSE 0 "
         "END AS agree_percentage FROM ("
         "SELECT lulc_1, lulc_2, {valCol} FROM {tbl} UNION ALL "
         "SELECT lulc_1, lulc_2, {valCol} FROM ("
         "SELECT lulc_1 AS lulc_2, lulc_2 AS lulc_1, {valCol} "
         "FROM {tbl}"
         ") AS tst"
         ") AS foo FULL JOIN ("
         "SELECT lulc_1 AS tmp1, lulc_2 AS tmp2 FROM ("
         "SELECT lulc_1 AS lulc_1 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_1 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_1, ("
         "SELECT lulc_1 AS lulc_2 FROM {tbl} GROUP BY lulc_1 "
         "UNION ALL "
         "SELECT lulc_2 AS lulc_2 FROM {tbl} GROUP BY lulc_2"
         ") AS tst_2 WHERE lulc_1 = lulc_2 GROUP BY lulc_1, lulc_2"
         ") AS jtbl ON foo.lulc_1 = jtbl.tmp1 AND foo.lulc_2 = jtbl.tmp2 "
         "ORDER BY lulc_1, lulc_2"
         "') AS ct("
         "lulc_map text, {crossCols}"
         ")")

    TOTAL_AGREE_TABLE = None
    TOTAL_AREA_TABLE = None
    for f in FLDS_TO_PIVOT:
        if not TOTAL_AGREE_TABLE:
            TOTAL_AGREE_TABLE = q_to_ntbl(
                DB,
                "agreement_table",
                Q.format(tbl=total_table,
                         valCol=f,
                         crossCols=", ".join([
                             "{} numeric".format(map_) for map_ in mapsNames
                         ])),
                api='psql')

        else:
            TOTAL_AREA_TABLE = q_to_ntbl(DB,
                                         "area_table",
                                         Q.format(tbl=total_table,
                                                  valCol=f,
                                                  crossCols=", ".join([
                                                      "{} numeric".format(map_)
                                                      for map_ in mapsNames
                                                  ])),
                                         api='psql')

    # Union Mapping
    UNION_MAPPING = pandas.DataFrame(
        [[k[0], k[1], fprop(UNION_SHAPE[k], 'fn')] for k in UNION_SHAPE],
        columns=['shp_a', 'shp_b', 'union_shp'])

    UNION_MAPPING = df_to_db(DB, UNION_MAPPING, 'union_map', api='psql')

    # Export Results
    TABLES = [UNION_MAPPING, TOTAL_AGREE_TABLE, TOTAL_AREA_TABLE
              ] + [SYNTH_TBL[x]["MATRIX"] for x in SYNTH_TBL]

    SHEETS = ["union_map", "agreement_percentage", "area_with_data_km"] + [
        "{}_{}".format(fprop(x[0], 'fn')[:15],
                       fprop(x[1], 'fn')[:15]) for x in SYNTH_TBL
    ]

    db_to_tbl(DB, ["SELECT * FROM {}".format(x) for x in TABLES],
              REPORT,
              sheetsNames=SHEETS,
              dbAPI='psql')

    return REPORT