Exemplo n.º 1
0
def dump(overlaps, aggregate):
    """Dump output designatedlands table to file
    """
    if aggregate:
        if overlaps:
            util.log('ignoring --overlaps flag')
        geoutil.dump_aggregate(config, 'designatedlands_agg')
    else:
        if overlaps:
            config['out_table'] = config['out_table'] + '_overlaps'
        db = pgdata.connect(config["db_url"], schema="public")
        util.log('Dumping %s to %s' %
                 (config['out_table'], config['out_file']))
        columns = [
            c for c in db[config['out_table']].columns
            if c != 'geom' and 'prelim' not in c
        ]
        ogr_sql = """SELECT {cols},
                    st_collectionextract(st_safe_repair(st_snaptogrid(geom, .001)), 3) as geom
                    FROM {t}
                    WHERE designation IS NOT NULL
                """.format(cols=",".join(columns), t=config['out_table'])
        util.log(ogr_sql)
        db = pgdata.connect(config["db_url"])
        db.pg2ogr(
            ogr_sql,
            config['out_format'],
            config['out_file'],
            config['out_table'],
            geom_type="MULTIPOLYGON",
        )
Exemplo n.º 2
0
def overlay(in_file, in_layer, dump_file, new_layer_name):
    """Intersect layer with designatedlands
    """
    # load in_file to postgres
    db = pgdata.connect(config['db_url'], schema="public")
    if not in_layer:
        in_layer = fiona.listlayers(in_file)[0]
    if not new_layer_name:
        new_layer_name = in_layer[:63]  # Maximum table name length is 63
    out_layer = new_layer_name[:50] + "_overlay"
    db.ogr2pg(in_file, in_layer=in_layer, out_layer=new_layer_name)
    # pull distinct tiles iterable into a list
    tiles = [t for t in db["tiles"].distinct('map_tile')]
    # uncomment and adjust for debugging a specific tile
    # tiles = [t for t in tiles if t[:4] == '092K']
    util.log("Intersecting %s with %s" % (config['out_table'], new_layer_name))
    geoutil.intersect(
        db,
        config['out_table'],
        new_layer_name,
        out_layer,
        config['n_processes'],
        tiles,
    )
    # dump result to file
    if dump_file:
        util.log("Dumping intersect to file %s " % config['out_file'])
        dump(out_layer, config['out_file'], config['out_format'])
def create_temp_table():
    # create new table
    db = pgdata.connect()
    db.execute("DROP TABLE IF EXISTS whse_basemapping.temp_upstream_wb_area_ha")
    db.execute(
        """
        CREATE TABLE whse_basemapping.temp_upstream_wb_area_ha
        (linear_feature_id bigint primary key,
         upstream_lake_ha double precision,
         upstream_reservoir_ha double precision,
         upstream_wetland_ha double precision)
        """
    )
    groups = [
        r[0]
        for r in db.query(
            """
            SELECT watershed_group_code
            FROM whse_basemapping.fwa_watershed_groups_poly
            WHERE watershed_group_code in ('BULK','MORR','HORS','LNIC','ELKR')
            ORDER BY watershed_group_code
            """
        )
    ]
    # use a raw connection so that our queries are not wrapped in transactions -
    # postgres will therefore clear the on disk temp files and we won't run out of disk space...
    conn = psycopg2.connect("")

    # insert per watershed group
    for wsg in groups:
        print(wsg)
        cur = conn.cursor()
        cur.execute(db.queries["temp_upstream_wb_area_ha"], (wsg,))
        conn.commit()
        cur.close()
Exemplo n.º 4
0
def segment_streams(stream_table, point_table):
    """
    Break streams at points.

    Break lines in stream_table at each location specified in point_table
    Points are defined as blue_line_key / route measure (not geom).
    Each table must have the standard FWA columns.
    """
    db = pgdata.connect()
    stream_schema, stream_table = db.parse_table_name(stream_table)
    point_schema, point_table = db.parse_table_name(point_table)
    groups = [
        g[0] for g in db.query(
            "SELECT DISTINCT watershed_group_code FROM bcfishpass.streams")
    ]
    query = sql.SQL(read_file("sql/00_segment_streams.sql")).format(
        stream_schema=sql.Identifier(stream_schema),
        stream_table=sql.Identifier(stream_table),
        point_schema=sql.Identifier(point_schema),
        point_table=sql.Identifier(point_table),
    )
    func = partial(execute_parallel, query)
    n_processes = multiprocessing.cpu_count() - 1
    pool = multiprocessing.Pool(processes=n_processes)
    pool.map(func, groups)
    pool.close()
    pool.join()
def apply_updates():
    """apply updates rather than re-loading to another temp table"""
    db = pgdata.connect()
    # add new columns if not present
    db.execute("ALTER TABLE whse_basemapping.fwa_stream_networks_sp ADD COLUMN IF NOT EXISTS upstream_lake_ha double precision")
    db.execute("ALTER TABLE whse_basemapping.fwa_stream_networks_sp ADD COLUMN IF NOT EXISTS upstream_reservoir_ha double precision")
    db.execute("ALTER TABLE whse_basemapping.fwa_stream_networks_sp ADD COLUMN IF NOT EXISTS upstream_wetland_ha double precision")
    groups = [
        r[0]
        for r in db.query(
            """
            SELECT watershed_group_code
            FROM whse_basemapping.fwa_watershed_groups_poly
            WHERE watershed_group_code in ('BULK','MORR','HORS','LNIC','ELKR')
            ORDER BY watershed_group_code
            """
        )
    ]
    conn = psycopg2.connect("")
    for wsg in groups:
        print(wsg)
        cur = conn.cursor()
        cur.execute(db.queries["update_upstream_wb_area_ha"], (wsg,))
        conn.commit()
        cur.close()
Exemplo n.º 6
0
def test_create_table():
    db = connect(URL, schema="pgdata")
    columns = [Column('user_id', Integer, primary_key=True),
               Column('user_name', UnicodeText, nullable=False),
               Column('email_address', UnicodeText),
               Column('password', UnicodeText, nullable=False)]
    employees = db.create_table("employees", columns)
    assert employees.table.exists()
Exemplo n.º 7
0
def add_downstream_ids(
    table_a, id_a, table_b, id_b, downstream_ids_col, include_equivalent_measure
):
    """note downstream ids
    """
    db = pgdata.connect()
    schema_a, table_a = db.parse_table_name(table_a)
    schema_b, table_b = db.parse_table_name(table_b)
    # ensure that any existing values get removed
    db.execute(
        f"ALTER TABLE {schema_a}.{table_a} DROP COLUMN IF EXISTS {downstream_ids_col}"
    )
    temp_table = table_a + "_tmp"
    db[f"{schema_a}.{temp_table}"].drop()
    db.execute(f"CREATE TABLE {schema_a}.{temp_table} (LIKE {schema_a}.{table_a})")
    db.execute(
        f"ALTER TABLE {schema_a}.{temp_table} ADD COLUMN {downstream_ids_col} integer[]"
    )
    groups = sorted(
        [
            g[0]
            for g in db.query(
                f"SELECT DISTINCT watershed_group_code from {schema_a}.{table_a}"
            )
        ]
    )
    # todo - is this really the best way to specify which query to use?
    if include_equivalent_measure:
        q = "sql/00_add_downstream_and_equivalent_ids.sql"
    else:
        q = "sql/00_add_downstream_ids.sql"
    query = sql.SQL(read_file(q)).format(
        schema_a=sql.Identifier(schema_a),
        schema_b=sql.Identifier(schema_b),
        temp_table=sql.Identifier(temp_table),
        table_a=sql.Identifier(table_a),
        table_b=sql.Identifier(table_b),
        id_a=sql.Identifier(id_a),
        id_b=sql.Identifier(id_b),
        dnstr_ids_col=sql.Identifier(downstream_ids_col),
    )
    # run each group in parallel
    func = partial(execute_parallel, query)
    n_processes = multiprocessing.cpu_count() - 1
    pool = multiprocessing.Pool(processes=n_processes)
    pool.map(func, groups)
    pool.close()
    pool.join()
    # drop source table, rename new table, re-create indexes
    db[f"{schema_a}.{table_a}"].drop()
    db.execute(f"ALTER TABLE {schema_a}.{temp_table} RENAME TO {table_a}")
    create_indexes(f"{schema_a}.{table_a}")
    db.execute(f"ALTER TABLE {schema_a}.{table_a} ADD PRIMARY KEY ({id_a})")
    # make sure the table gets analyzed before running this again
    conn = db.engine.raw_connection()
    conn.set_isolation_level(0)
    cur = conn.cursor()
    cur.execute(f"VACUUM ANALYZE {schema_a}.{table_a}")
Exemplo n.º 8
0
def test_create_table():
    db = connect(URL, schema="pgdata")
    columns = [
        Column('user_id', Integer, primary_key=True),
        Column('user_name', UnicodeText, nullable=False),
        Column('email_address', UnicodeText),
        Column('password', UnicodeText, nullable=False)
    ]
    employees = db.create_table("employees", columns)
    assert employees.table.exists()
Exemplo n.º 9
0
def union(db_url, in_table, columns, out_table):
    """Union/merge overlapping records with equivalent values for provided columns
    """
    db = pgdata.connect(db_url)
    sql = f"""CREATE TABLE {out_table} AS
             SELECT
               {columns},
               (ST_Dump(ST_Union(geom))).geom as geom
             FROM {in_table}
             GROUP BY {columns}
          """
    db.execute(sql)
Exemplo n.º 10
0
def parallel_tiled(db_url, sql, tile, n_subs=1):
    """
    Create a connection and execute query for specified tile
    n_subs is the number of places in the sql query that should be
    substituted by the tile name
    """
    db = pgdata.connect(db_url, schema="designatedlands", multiprocessing=True)
    # As we are explicitly splitting up our job by tile and processing tiles
    # concurrently in individual connections we don't want the database to try
    # and manage parallel execution of these queries within these connections.
    # Turn off this connection's parallel execution:
    db.execute("SET max_parallel_workers_per_gather = 0")
    db.execute(sql, (tile + "%",) * n_subs)
Exemplo n.º 11
0
def test_find_module_query():
    db = connect(URL)
    db.execute(db.queries['utmzen2bcalb'])
    sql = """SELECT
                routines.routine_name
             FROM information_schema.routines
             LEFT JOIN information_schema.parameters
               ON routines.specific_name=parameters.specific_name
             WHERE routines.specific_schema='public'
             AND routines.routine_name = 'utmzen2bcalb'
             ORDER BY routines.routine_name, parameters.ordinal_position;
          """
    assert db.query(sql).fetchone()['routine_name'] == 'utmzen2bcalb'
Exemplo n.º 12
0
def test_find_module_query():
    db = connect(URL)
    db.execute(db.queries['utmzen2bcalb'])
    sql = """SELECT
                routines.routine_name
             FROM information_schema.routines
             LEFT JOIN information_schema.parameters
               ON routines.specific_name=parameters.specific_name
             WHERE routines.specific_schema='public'
             AND routines.routine_name = 'utmzen2bcalb'
             ORDER BY routines.routine_name, parameters.ordinal_position;
          """
    assert db.query(sql).fetchone()['routine_name'] == 'utmzen2bcalb'
Exemplo n.º 13
0
def execute_parallel(sql, wsg):
    """Execute sql for specified wsg using a non-pooled, non-parallel conn
    """
    # specify multiprocessing when creating to disable connection pooling
    db = pgdata.connect(multiprocessing=True)
    conn = db.engine.raw_connection()
    cur = conn.cursor()
    # Turn off parallel execution for this connection, because we are
    # handling the parallelization ourselves
    cur.execute("SET max_parallel_workers_per_gather = 0")
    cur.execute(sql, (wsg, ))
    conn.commit()
    cur.close()
    conn.close()
Exemplo n.º 14
0
def create_indexes(table):
    """create usual fwa indexes
    """
    db = pgdata.connect()
    schema, table = db.parse_table_name(table)
    db.execute(f"""CREATE INDEX ON {schema}.{table} (linear_feature_id);
    CREATE INDEX ON {schema}.{table} (blue_line_key);
    CREATE INDEX ON {schema}.{table} (watershed_group_code);
    CREATE INDEX ON {schema}.{table} USING GIST (wscode_ltree);
    CREATE INDEX ON {schema}.{table} USING BTREE (wscode_ltree);
    CREATE INDEX ON {schema}.{table} USING GIST (localcode_ltree);
    CREATE INDEX ON {schema}.{table} USING BTREE (localcode_ltree);
    CREATE INDEX ON {schema}.{table} USING GIST (geom);
    """)
Exemplo n.º 15
0
def add_upstream_ids(table_a, id_a, table_b, id_b, upstream_ids_col):
    """note upstream ids
    """
    db = pgdata.connect()
    schema_a, table_a = db.parse_table_name(table_a)
    schema_b, table_b = db.parse_table_name(table_b)
    # ensure that any existing values get removed
    db.execute(
        f"ALTER TABLE {schema_a}.{table_a} DROP COLUMN IF EXISTS {upstream_ids_col}"
    )
    temp_table = table_a + "_tmp"
    db[f"{schema_a}.{temp_table}"].drop()
    db.execute(f"CREATE TABLE {schema_a}.{temp_table} (LIKE {schema_a}.{table_a})")
    db.execute(
        f"ALTER TABLE {schema_a}.{temp_table} ADD COLUMN {upstream_ids_col} integer[]"
    )
    groups = sorted(
        [
            g[0]
            for g in db.query(
                f"SELECT DISTINCT watershed_group_code from {schema_a}.{table_a}"
            )
        ]
    )
    query = sql.SQL(read_file("sql/00_add_upstream_ids.sql")).format(
        schema_a=sql.Identifier(schema_a),
        schema_b=sql.Identifier(schema_b),
        temp_table=sql.Identifier(temp_table),
        table_a=sql.Identifier(table_a),
        table_b=sql.Identifier(table_b),
        id_a=sql.Identifier(id_a),
        id_b=sql.Identifier(id_b),
        upstr_ids_col=sql.Identifier(upstream_ids_col),
    )
    # run each group in parallel
    func = partial(execute_parallel, query)
    n_processes = multiprocessing.cpu_count() - 1
    pool = multiprocessing.Pool(processes=n_processes)
    pool.map(func, groups)
    pool.close()
    pool.join()
    # drop source table, rename new table, re-create indexes
    db[f"{schema_a}.{table_a}"].drop()
    db.execute(f"ALTER TABLE {schema_a}.{temp_table} RENAME TO {table_a}")
    create_indexes(f"{schema_a}.{table_a}")
    db.execute(f"ALTER TABLE {schema_a}.{table_a} ADD PRIMARY KEY ({id_a})")
    conn = db.engine.raw_connection()
    conn.set_isolation_level(0)
    cur = conn.cursor()
    cur.execute(f"VACUUM ANALYZE {schema_a}.{table_a}")
Exemplo n.º 16
0
def report(point_table, point_id, barriers_table, dnstr_barriers_id):
    db = pgdata.connect()
    point_schema, point_table = db.parse_table_name(point_table)
    barriers_schema, barriers_table = db.parse_table_name(barriers_table)
    query = sql.SQL(read_file("sql/00_report.sql")).format(
        point_schema=sql.Identifier(point_schema),
        point_table=sql.Identifier(point_table),
        point_id=sql.Identifier(point_id),
        barriers_schema=sql.Identifier(barriers_schema),
        barriers_table=sql.Identifier(barriers_table),
        dnstr_barriers_id=sql.Identifier(dnstr_barriers_id),
    )
    conn = db.engine.raw_connection()
    cur = conn.cursor()
    cur.execute(query)
    conn.commit()
Exemplo n.º 17
0
def create_db():
    """Create a fresh database
    """
    util.log('Creating database %s' % config['db_url'])
    pgdata.create_db(config["db_url"])
    db = pgdata.connect(config["db_url"])
    db.execute("CREATE EXTENSION IF NOT EXISTS postgis")
    # the pgxn extension does not work on windows
    # note to the user to add lostgis functions manually with provided
    # .bat file as a reference
    if os.name == 'posix':
        db.execute("CREATE EXTENSION IF NOT EXISTS lostgis")
    else:
        util.log(
            'Remember to add required lostgis functions to your new database',
            level=30,
        )
        util.log('See scripts\lostgis_windows.bat as a guide', level=30)
Exemplo n.º 18
0
def clip(db_url, in_table, clip_table, out_table):
    """Clip geometry of in_table by clip_table, writing output to out_table
    """
    db = pgdata.connect(db_url)
    columns = ", ".join(["a." + c for c in db[in_table].columns if c != "geom"])
    sql = f"""CREATE TABLE {out_table} AS
             SELECT
               {columns},
               CASE
                 WHEN ST_CoveredBy(a.geom, b.geom) THEN a.geom
                 ELSE ST_Multi(
                        ST_CollectionExtract(
                          ST_Intersection(a.geom,b.geom), 3)) END AS geom
             FROM {in_table} AS a
             INNER JOIN {clip_table} AS b
             ON ST_Intersects(a.geom, b.geom)
          """
    db.execute(sql)
Exemplo n.º 19
0
def test_create_schema():
    db = connect(URL, schema="pgdata")
    db.create_schema("pgdata")
Exemplo n.º 20
0
def load(alias, force_download):
    """Download data, load to postgres
    """
    db = pgdata.connect(config["db_url"])
    sources = util.read_csv(config["source_csv"])
    # filter sources based on optional provided alias and ignoring excluded
    if alias:
        sources = [s for s in sources if s["alias"] == alias]
        if not sources:
            raise ValueError('Alias %s does not exist' % alias)

    sources = [s for s in sources if s["exclude"] != 'T']
    # process sources where automated downloads are avaiable
    load_commands = []
    for source in [s for s in sources if s["manual_download"] != 'T']:
        # handle BCGW downloads
        if urlparse(source["url"]).hostname == 'catalogue.data.gov.bc.ca':
            file, layer = download.download_bcgw(
                source["url"],
                config["dl_path"],
                email=config["email"],
                force_download=force_download,
            )
        # handle all other downloads (zipfiles only)
        else:
            file, layer = download.download_non_bcgw(
                source['url'],
                config['dl_path'],
                source['file_in_url'],
                source['layer_in_file'],
                force_download=force_download,
            )
        load_commands.append(
            db.ogr2pg(
                file,
                in_layer=layer,
                out_layer=source["input_table"],
                sql=source["query"],
                cmd_only=True,
            ))
    # process manually downloaded sources
    for source in [s for s in sources if s["manual_download"] == 'T']:
        file = os.path.join(config['dl_path'], source["file_in_url"])
        if not os.path.exists(file):
            raise Exception(file + " does not exist, download it manually")

        load_commands.append(
            db.ogr2pg(
                file,
                in_layer=source['layer_in_file'],
                out_layer=source["input_table"],
                sql=source["query"],
                cmd_only=True,
            ))
    # run all ogr commands in parallel
    util.log('Loading source data to database.')
    # https://stackoverflow.com/questions/14533458/python-threading-multiple-bash-subprocesses
    processes = [subprocess.Popen(cmd, shell=True) for cmd in load_commands]
    for p in processes:
        p.wait()
    # log ogr statements for debugging
    #for cmd in load_commands:
    #    util.log(cmd)
    #    subprocess.call(cmd, shell=True)
    # create tiles layer
    util.log('Creating tiles layer')
    db.execute(db.queries["create_tiles"])
Exemplo n.º 21
0
def test_list_schema():
    db = connect(URL, schema="pgdata")
    assert set(["information_schema", "pgdata", "public"]) <= set(db.schemas)
Exemplo n.º 22
0
def dump_aggregate(config, new_layer_name):
    """
    UNSUPPORTED

    test aggregation of designatedlands over tile boundaries

    Output data is aggregated across map tiles to remove gaps introduced in
    tiling of the sources. Aggregation is by distinct 'designation' in the
    output layer, and is run separately for each designation for speed.
    To dump data aggregated by 'category' or some other field, build and run
    your own ogr2ogr command based on below queries.

    This command is unsupported, aggregation does not quite remove gaps across
    all records and is very slow. Use mapshaper to aggregate outputs from the
    dump command (convert to shapefile first)

    eg:
    $ mapshaper designatedlands.shp \
        -clean snap-interval=0.01 \
        -dissolve designatio copy-fields=category \
        -explode \
        -o dl_clean.shp
    """
    # config = util.read_config(config)
    db = pgdata.connect(config["db_url"], schema="public")
    util.log('Aggregating %s to %s' % (config['out_table'], new_layer_name))
    # find all non-null designations
    designations = [
        d for d in db[config['out_table']].distinct('designation') if d
    ]
    db[new_layer_name].drop()
    sql = """CREATE TABLE {new_layer_name} AS
             SELECT designation, category, bc_boundary, geom
             FROM {out_table}
             LIMIT 0""".format(
        new_layer_name=new_layer_name, out_table=config['out_table']
    )
    db.execute(sql)
    # iterate through designations to speed up the aggregation
    for designation in designations:
        util.log('Adding %s to %s' % (designation, new_layer_name))
        # dump records entirely within a tile
        sql = """
        INSERT INTO {new_layer_name} (designation, category, bc_boundary, geom)
        SELECT
          dl.designation,
          dl.category,
          dl.bc_boundary,
          dl.geom
        FROM {t} dl
        INNER JOIN tiles ON dl.map_tile = tiles.map_tile
        WHERE dl.designation = %s
        AND ST_Coveredby(dl.geom, ST_Buffer(tiles.geom, -.01))
        """.format(
            t=config['out_table'], new_layer_name=new_layer_name
        )
        db.execute(sql, (designation,))
        # aggregate cross-tile records
        # Notes:
        # - expansion/contraction buffering of 3mm to remove gaps between tiles
        # - ST_Buffer of 0 on ST_Collect is much faster than ST_Union
        # - ST_Collect is a bit less robust, it requires
        #   ST_RemoveRepeatedPoints to complete successfully on sources that
        #   appear to come from rasters (mineral_reserve, ogma_legal)
        sql = """
        INSERT INTO {new_layer_name} (designation, category, bc_boundary, geom)
        SELECT
          designation,
          category,
          bc_boundary,
          (ST_Dump(ST_Buffer(geom, -.003))).geom as geom
        FROM (
          SELECT
            dl.designation,
            dl.category,
            dl.bc_boundary,
            ST_Buffer(
              ST_RemoveRepeatedPoints(
                ST_SnapToGrid(
                  ST_Collect(
                    ST_Buffer(dl.geom, .003)), .001), .01), 0) as geom
          FROM designatedlands dl
        INNER JOIN tiles ON dl.map_tile = tiles.map_tile
        WHERE dl.designation = %s
        AND NOT ST_Coveredby(dl.geom, ST_Buffer(tiles.geom, -.01))
        GROUP BY dl.designation, dl.category, dl.bc_boundary) as foo
        """.format(
            t=config['out_table'], new_layer_name=new_layer_name
        )
        db.execute(sql, (designation,))
    util.log('Dumping %s to file %s' % (new_layer_name, config['out_file']))
    db.pg2ogr(
        "SELECT * from " + new_layer_name,
        config['out_format'],
        config['out_file'],
        new_layer_name,
    )
Exemplo n.º 23
0
def test_insert_one():
    db = connect(URL)
    table = db["pgdata.employees"]
    table.insert(DATA[0])
Exemplo n.º 24
0
def test_null_table():
    db = connect(URL)
    db["table_that_does_not_exist"].drop()
    assert db["table_that_does_not_exist"]._is_dropped is True
Exemplo n.º 25
0
def parallel_query(id):
    sql = "SELECT user_name FROM pgdata.employees WHERE user_id = %s"
    db = connect(URL, multiprocessing=True)
    db.engine.execute(sql, (id,))
Exemplo n.º 26
0
def test_find_sqlpath_query():
    db = connect(URL, sql_path='tests/sql')
    assert db.queries['test'] == "SELECT 1 as test"
Exemplo n.º 27
0
def test_insert_one():
    db = connect(URL)
    table = db["pgdata.employees"]
    table.insert(DATA[0])
Exemplo n.º 28
0
def test_query_keys():
    db = connect(URL)
    sql = "SELECT user_name FROM pgdata.employees WHERE user_id = %s"
    assert db.engine.execute(sql, (1,)).keys() == ['user_name']
Exemplo n.º 29
0
def test_query_params_1():
    db = connect(URL)
    sql = "SELECT user_name FROM pgdata.employees WHERE user_id = %s"
    r = db.query(sql, (1,)).fetchall()
    assert r[0][0] == 'Fred'
    assert r[0]["user_name"] == 'Fred'
Exemplo n.º 30
0
def test_build_query():
    db = connect(URL)
    sql = "SELECT $UserName FROM pgdata.employees WHERE $UserId = 1"
    lookup = {"UserName": "******", "UserId": "user_id"}
    new_sql = db.build_query(sql, lookup)
    assert new_sql == "SELECT user_name FROM pgdata.employees WHERE user_id = 1"
Exemplo n.º 31
0
def test_get_table_cross_schema():
    db = connect(URL)
    assert db["pgdata.employees"] is not None
Exemplo n.º 32
0
def test_distinct():
    db = connect(URL, schema="pgdata")
    users = [r[0] for r in db["employees"].distinct('user_name')]
    assert len(users) == 3
Exemplo n.º 33
0
def test_connect():
    db = connect(URL, schema="pgdata")
    assert db.url == "postgresql://*****:*****@localhost:5432/pgdata"
Exemplo n.º 34
0
def test_insert_many():
    db = connect(URL)
    table = db["pgdata.employees"]
    table.insert(DATA[1:])
Exemplo n.º 35
0
def test_drop_schema():
    db = connect(URL, schema="pgdata")
    db.drop_schema("pgdata", cascade=True)
Exemplo n.º 36
0
def test_create_index():
    db = connect(URL, schema="pgdata")
    indexname = 'employees_user_name_idx'
    db['employees'].create_index(['user_name'], indexname)
    indexes = db['employees'].indexes.keys()
    assert indexname in indexes
Exemplo n.º 37
0
def test_create_schema():
    db = connect(URL, schema="pgdata")
    db.create_schema("pgdata")
Exemplo n.º 38
0
    def __init__(self, config_file=None):

        LOG.info("Initializing designatedlands")

        # load default config
        self.config = DEFAULT_CONFIG.copy()

        # if provided with a config file, replace config values with those present in
        # thie config file
        if config_file:
            if not os.path.exists(config_file):
                raise ConfigValueError(f"File {config_file} does not exist")
            self.read_config(config_file)

        # set default n_processes to the number of cores available minus one
        if self.config["n_processes"] == -1:
            self.config["n_processes"] = multiprocessing.cpu_count() - 1

        # don't try and use more cores than are available
        elif self.config["n_processes"] > multiprocessing.cpu_count():
            self.config["n_processes"] = multiprocessing.cpu_count()

        self.db = pgdata.connect(self.config["db_url"])
        self.db.ogr_string = f"PG:host={self.db.host} user={self.db.user} dbname={self.db.database} password={self.db.password} port={self.db.port}"

        # define valid restriction classes and assign raster values
        self.restriction_lookup = {
            "PROTECTED": 5,
            "FULL": 4,
            "HIGH": 3,
            "MEDIUM": 2,
            "LOW": 1,
            "NONE": 0,
        }
        # load sources from csv
        self.read_sources()

        # define bounds manually
        self.bounds = [273287.5, 367687.5, 1870687.5, 1735887.5]

        width = max(
            int(
                ceil(
                    (self.bounds[2] - self.bounds[0]) / float(self.config["resolution"])
                )
            ),
            1,
        )
        height = max(
            int(
                ceil(
                    (self.bounds[3] - self.bounds[1]) / float(self.config["resolution"])
                )
            ),
            1,
        )

        self.raster_profile = {
            "count": 1,
            "crs": "EPSG:3005",
            "width": width,
            "height": height,
            "transform": Affine(
                self.config["resolution"],
                0,
                self.bounds[0],
                0,
                -self.config["resolution"],
                self.bounds[3],
            ),
            "nodata": 255,
        }
Exemplo n.º 39
0
def test_list_schema():
    db = connect(URL, schema="pgdata")
    assert set(["information_schema", "pgdata", "public"]) <= set(db.schemas)
Exemplo n.º 40
0
def bc2pg(
    dataset,
    db_url,
    table,
    schema,
    query,
    pagesize,
    max_workers,
    dim,
    fid,
    verbose,
    quiet,
):
    """Download a DataBC WFS layer to postgres - an ogr2ogr wrapper.

     \b
      $ bcdata bc2pg bc-airports --db_url postgresql://postgres:postgres@localhost:5432/postgis

    The default target database can be specified by setting the $DATABASE_URL
    environment variable.
    https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls
    """

    # for this command, default to INFO level logging
    # (echo the ogr2ogr commands by default)
    verbosity = verbose - quiet
    log_level = max(10, 20 - 10 * verbosity)
    logging.basicConfig(stream=sys.stderr, level=log_level)
    log = logging.getLogger(__name__)

    src = bcdata.validate_name(dataset)
    src_schema, src_table = [i.lower() for i in src.split(".")]
    if not schema:
        schema = src_schema
    if not table:
        table = src_table
    # always upper
    if fid:
        fid = fid.upper()
    # create schema if it does not exist
    conn = pgdata.connect(db_url)
    if schema not in conn.schemas:
        click.echo("Schema {} does not exist, creating it".format(schema))
        conn.create_schema(schema)

    # build parameters for each required request
    param_dicts = bcdata.define_request(
        dataset, query=query, sortby=fid, pagesize=pagesize
    )

    # run the first request / load
    payload = urlencode(param_dicts[0], doseq=True)
    url = bcdata.WFS_URL + "?" + payload
    db = parse_db_url(db_url)
    db_string = "PG:host={h} user={u} dbname={db} password={pwd}".format(
        h=db["host"], u=db["user"], db=db["database"], pwd=db["password"]
    )

    # create the table
    command = [
        "ogr2ogr",
        "-lco",
        "OVERWRITE=YES",
        "-lco",
        "SCHEMA={}".format(schema),
        "-lco",
        "GEOMETRY_NAME=geom",
        "-f",
        "PostgreSQL",
        db_string,
        "-t_srs",
        "EPSG:3005",
        "-nln",
        table,
        url,
    ]
    if dim:
        command = command + ["-dim", dim]
    if fid:
        command = command + ["-lco", "FID={}".format(fid)]
    # for speed with big loads - unlogged, no spatial index
    if len(param_dicts) > 1:
        command = command + ["-lco", "UNLOGGED=ON"]
        command = command + ["-lco", "SPATIAL_INDEX=NONE"]
    log.info(" ".join(command))
    subprocess.run(command)

    # write to additional separate tables if data is larger than 10k recs
    if len(param_dicts) > 1:
        commands = []
        for n, paramdict in enumerate(param_dicts[1:]):
            # create table to load to (so types are identical)
            sql = """
            CREATE TABLE {schema}.{table}_{n}
            (LIKE {schema}.{table}
            INCLUDING ALL)
            """.format(
                schema=schema, table=table, n=str(n)
            )
            conn.execute(sql)
            payload = urlencode(paramdict, doseq=True)
            url = bcdata.WFS_URL + "?" + payload
            command = [
                "ogr2ogr",
                "-update",
                "-append",
                "-f",
                "PostgreSQL",
                db_string + " active_schema=" + schema,
                "-t_srs",
                "EPSG:3005",
                "-nln",
                table + "_" + str(n),
                url,
            ]
            if dim:
                command = command + ["-dim", dim]
            commands.append(command)

        # https://stackoverflow.com/questions/14533458
        pool = Pool(max_workers)
        with click.progressbar(
            pool.imap(partial(call), commands), length=len(param_dicts)
        ) as bar:
            for returncode in bar:
                if returncode != 0:
                    click.echo("Command failed: {}".format(returncode))

        # once loaded, combine & drop
        for n, _x in enumerate(param_dicts[1:]):
            sql = """INSERT INTO {schema}.{table} SELECT * FROM {schema}.{table}_{n}""".format(
                schema=schema, table=table, n=str(n)
            )
            conn.execute(sql)
            sql = "DROP TABLE {}.{}_{}".format(schema, table, n)
            conn.execute(sql)
        conn.execute("ALTER TABLE {}.{} SET LOGGED".format(schema, table))
        log.info("Indexing geometry")
        conn[schema + "." + table].create_index_geom()
        # deal with primary key - becaue loading to many tables,
        # ogc_fid is not unique
        if not fid:
            sql = "ALTER TABLE {}.{} DROP COLUMN ogc_fid".format(schema, table)
            conn.execute(sql)
            sql = "ALTER TABLE {}.{} ADD COLUMN ogc_fid SERIAL PRIMARY KEY".format(
                schema, table
            )
            conn.execute(sql)

    log.info(
        "Load of {} to {} in {} complete".format(src, schema + "." + table, db_url)
    )
Exemplo n.º 41
0
def test_tables_in_schema():
    db = connect(URL)
    tables = db.tables_in_schema("pgdata")
    assert set(tables) == set(["employees"])
Exemplo n.º 42
0
def process(resume, force_preprocess, tiles):
    """Create output designatedlands tables
    """
    db = pgdata.connect(config["db_url"], schema="public")
    # run required preprocessing, tile, attempt to clean inputs
    geoutil.preprocess(db, config['source_csv'], force=force_preprocess)
    geoutil.tile_sources(db, config['source_csv'], force=force_preprocess)
    geoutil.clean_and_agg_sources(db,
                                  config['source_csv'],
                                  force=force_preprocess)
    # parse the list of tiles
    tilelist = geoutil.parse_tiles(db, tiles)
    # create target tables if not resuming from a bailed process
    if not resume:
        # create output tables
        db.execute(
            db.build_query(
                db.queries["create_outputs_prelim"],
                {"table": config['out_table']},
            ))
    # filter sources - use only non-exlcuded sources with hierarchy > 0
    sources = [
        s for s in util.read_csv(config['source_csv'])
        if s['hierarchy'] != 0 and s["exclude"] != 'T'
    ]
    # To create output table with overlaps, combine all source data
    # (tiles argument does not apply, we could build a tile query string but
    # it seems unnecessary)
    for source in sources:
        util.log("Inserting %s into preliminary output overlap table" %
                 source["tiled_table"])
        sql = db.build_query(
            db.queries["populate_output_overlaps"],
            {
                "in_table": source["tiled_table"],
                "out_table": config['out_table'] + "_overlaps_prelim",
            },
        )
        db.execute(sql)
    # To create output table with no overlaps, more processing is required
    # In case of bailing during tests/development, `resume` option is available
    # to enable resumption of processing at specified hierarchy number
    if resume:
        p_sources = [s for s in sources if int(s["hierarchy"]) >= int(resume)]
    else:
        p_sources = sources
    # The tiles layer will fill in gaps between sources (so all BC is included
    # in output). To do this, first match schema of tiles to other sources
    db.execute("ALTER TABLE tiles ADD COLUMN IF NOT EXISTS id integer")
    db.execute("UPDATE tiles SET id = tile_id")
    db.execute("ALTER TABLE tiles ADD COLUMN IF NOT EXISTS designation text")
    # Next, add simple tiles layer definition to sources list
    p_sources.append({"cleaned_table": "tiles", "category": None})
    # iterate through all sources
    for source in p_sources:
        sql = db.build_query(
            db.queries["populate_output"],
            {
                "in_table": source["cleaned_table"],
                "out_table": config['out_table'] + "_prelim",
            },
        )
        # determine which specified tiles are present in source layer
        src_tiles = set(
            geoutil.get_tiles(db, source["cleaned_table"], tile_table='tiles'))
        if tilelist:
            tiles = set(tilelist) & src_tiles
        else:
            tiles = src_tiles
        if tiles:
            util.log("Inserting %s into preliminary output table" %
                     source["cleaned_table"])
            # for testing, run only one process and report on tile
            if config['n_processes'] == 1:
                for tile in tiles:
                    util.log(tile)
                    db.execute(sql, (tile + "%", ) * 2)
            else:
                func = partial(geoutil.parallel_tiled, db.url, sql, n_subs=2)
                pool = multiprocessing.Pool(processes=config['n_processes'])
                pool.map(func, tiles)
                pool.close()
                pool.join()
        else:
            util.log("No tiles to process")
    # create marine-terrestrial layer
    if 'bc_boundary' not in db.tables:
        geoutil.create_bc_boundary(db, config['n_processes'])

    # overlay output tables with marine-terrestrial definition
    for table in [config['out_table'], config['out_table'] + "_overlaps"]:
        util.log('Cutting %s with marine-terrestrial definition' % table)
        geoutil.intersect(
            db,
            table + "_prelim",
            "bc_boundary",
            table,
            config['n_processes'],
            tiles,
        )

    tidy_designations(db, sources, "cleaned_table", config['out_table'])
    tidy_designations(db, sources, "tiled_table",
                      config['out_table'] + "_overlaps")
Exemplo n.º 43
0
def test_create_index():
    db = connect(URL, schema="pgdata")
    indexname = 'employees_user_name_idx'
    db['employees'].create_index(['user_name'], indexname)
    indexes = db['employees'].indexes.keys()
    assert indexname in indexes
Exemplo n.º 44
0
def test_drop_schema():
    db = connect(URL, schema="pgdata")
    db.drop_schema("pgdata", cascade=True)
Exemplo n.º 45
0
def test_tables_in_schema():
    db = connect(URL)
    tables = db.tables_in_schema("pgdata")
    assert set(tables) == set(["employees"])
Exemplo n.º 46
0
import os
import shutil
import tempfile
import unittest

import fiona

import pgdata


URL = 'postgresql://*****:*****@localhost:5432/pgdata'
DB = pgdata.connect(URL)
DB.execute('CREATE SCHEMA IF NOT EXISTS pgdata')


DATA_1 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
AIRPORTS = os.path.join(DATA_1, 'bc_airports.json')

# also test a path with spaces
DATA_2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data space')
AIRPORTS_2 = os.path.join(DATA_2, 'bc_airports_one.json')


class ogrpg(unittest.TestCase):
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        self.spaced_dir = tempfile.mkdtemp("spa ced")

    def test_ogr2pg(self):
        db = DB
        db.ogr2pg(AIRPORTS, in_layer='bc_airports', out_layer='bc_airports',
Exemplo n.º 47
0
def test_get_table_cross_schema():
    db = connect(URL)
    assert db["pgdata.employees"] is not None
Exemplo n.º 48
0
def setup():
    create_db(URL)
    db = connect(URL)
    db.execute("CREATE EXTENSION IF NOT EXISTS postgis")
Exemplo n.º 49
0
def test_insert_many():
    db = connect(URL)
    table = db["pgdata.employees"]
    table.insert(DATA[1:])