예제 #1
0
def get_db_session():
    dataset = get_dataset_name()

    if dataset == None:
        return get_session()
    else:
        return get_session(database_config=("database_%s" % dataset))
예제 #2
0
def process_database(commit=False):
    session = get_session()

    # This speeds up the t2_survey_site spatial query by a factor of about 6
    session.execute("SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED")

    if not (is_empty(session, "t2_processed_survey")
            and is_empty(session, "t2_survey_site")):
        log.error(
            "Existing outputs found - please drop and recreate these tables first: t2_processed_sighting, t2_processed_survey, t2_survey_site"
        )
        log.info("""Run:
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE t2_processed_sighting;
TRUNCATE t2_processed_survey;
TRUNCATE t2_survey_site;
SET FOREIGN_KEY_CHECKS = 1;
			""")
        return

    process_sites(session)
    process_grid(session)

    if commit:
        log.info("Committing changes")
        session.commit()
    else:
        log.info("Rolling back changes (dry-run only)")
        session.rollback()
예제 #3
0
def main():
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s')

    parser = argparse.ArgumentParser(
        description='Import grid cells from shapefile')
    parser.add_argument('filename',
                        type=str,
                        help='Shapefile containing grid cells')
    args = parser.parse_args()

    session = get_session()

    with open_shapefile(args.filename, 'EPSG:4326') as (shp, reproject):
        for feature in tqdm(shp):
            props = feature['properties']
            geometry = reproject(shape(feature['geometry']))

            session.execute(
                """INSERT INTO grid_cell (id, geometry)
				VALUES (:search_type_id, ST_GeomFromWKB(_BINARY :geometry_wkb))""", {
                    'search_type_id': props['GridID'],
                    'geometry_wkb': shapely.wkb.dumps(geometry)
                })

    session.commit()
예제 #4
0
def export_to_sqlite(source_id, path):
    session = get_session()
    dest_db = init_sqlite(path)
    # tables = [table for (table,) in session.execute("show tables").fetchall()]
    tables = [
        ("t1_survey", "source_id = :source_id"),
        ("t1_site", "source_id = :source_id"),
        ("t1_sighting",
         "survey_id IN (SELECT id FROM t1_survey WHERE source_id = :source_id)"
         ),
        ("taxon",
         "id IN (SELECT taxon_id FROM t1_sighting, t1_survey WHERE survey_id = t1_survey.id AND source_id = :source_id)"
         ), ("aggregated_by_month", "FALSE"), ("aggregated_by_year", "FALSE"),
        ("taxon_group", None), ("taxon_status", None), ("search_type", None),
        ("source", "id = :source_id"), ("t2_site", "FALSE"),
        ("data_source", "source_id = :source_id"), ("region", "FALSE"),
        ("unit", None), ("taxon_source_alpha_hull", "FALSE"),
        ("intensive_management", None), ("experimental_design_type", None),
        ("response_variable_type", None), ("monitoring_program", None)
    ]

    for table, where_clause in tables:
        # print(table)
        copy_table_schema(session, table, dest_db)
        copy_table_data(session, table, dest_db, where_clause,
                        {'source_id': source_id})

    return path
예제 #5
0
def process_database(species = None, commit = False):
    session = get_session()

    if species == None:
        taxa = [taxon_id for (taxon_id,) in session.execute("SELECT DISTINCT taxon_id FROM processing_method WHERE data_type = 2").fetchall()]
    else:
        taxa = [taxon_id for (taxon_id,) in session.execute(
                "SELECT DISTINCT taxon_id FROM processing_method, taxon WHERE taxon.id = taxon_id AND data_type = 2 AND spno IN %s" % sql_list_placeholder('species', species),
                sql_list_argument('species', species)
            ).fetchall()]

    create_region_lookup_table(session)

    log.info("Step 1/2: Monthly aggregation")

    fn = functools.partial(aggregate_by_month, commit = commit)

    for result, error in tqdm(run_parallel(fn, taxa), total = len(taxa)):
        pass

    log.info("Step 2/2: Yearly aggregation")

    fn = functools.partial(aggregate_by_year, commit = commit)

    for result, error in tqdm(run_parallel(fn, taxa), total = len(taxa)):
        pass

    cleanup_region_lookup_table(session)
예제 #6
0
def process_database(species=None, commit=False):
    session = get_session()
    if species == None:
        taxa = [
            taxon_id for (taxon_id, ) in session.execute(
                "SELECT DISTINCT taxon_id FROM t1_sighting").fetchall()
        ]
    else:
        taxa = [
            taxon_id for (taxon_id, ) in session.execute(
                "SELECT DISTINCT taxon_id FROM t1_sighting, taxon WHERE taxon.id = taxon_id AND spno IN (%s)"
                % sql_list_placeholder('species', species),
                sql_list_argument('species', species)).fetchall()
        ]

    create_region_lookup_table(session)

    # Process in parallel
    tasks = [(taxon_id, commit) for taxon_id in taxa]

    log.info("Step 1/2: Monthly aggregation")

    for result, error in tqdm(run_parallel(aggregate_monthly, tasks),
                              total=len(tasks)):
        if error:
            print error

    log.info("Step 1/2: Yearly aggregation")

    for result, error in tqdm(run_parallel(aggregate_yearly, tasks),
                              total=len(tasks)):
        if error:
            print error

    cleanup_region_lookup_table(session)
예제 #7
0
def main():
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s')

    parser = argparse.ArgumentParser(
        description='Import species range polygons into TSX database')
    parser.add_argument('dir',
                        type=str,
                        help='Directory containing species range shapefiles')
    args = parser.parse_args()

    session = get_session()

    filenames = [f for f in os.listdir(args.dir) if f.endswith('.shp')]

    for filename in tqdm(filenames):
        spno = int(filename[0:-4])
        try:
            # https://pyproj4.github.io/pyproj/stable/crs_compatibility.html#fiona
            with fiona.Env(OSR_WKT_FORMAT="WKT2_2018"), fiona.open(
                    os.path.join(args.dir,
                                 filename), encoding='Windows-1252') as shp:
                process_shp(session, spno, shp)
        except KeyboardInterrupt:
            log.info("Aborting - no changes saved")
            return

    session.commit()
예제 #8
0
def process_database(species=None, commit=False):
    session = get_session()

    # Just get taxa that have range polygons
    if species == None:
        taxa = session.execute(
            "SELECT DISTINCT taxon_id FROM taxon_range").fetchall()
    else:
        taxa = session.execute(
            "SELECT DISTINCT taxon_id FROM taxon_range, taxon WHERE taxon_id = taxon.id AND spno IN (%s)"
            % sql_list_placeholder('species', species),
            sql_list_argument('species', species)).fetchall()

    # Unwrap tuple
    taxa = [taxon_id for (taxon_id, ) in taxa]

    # Delete old results
    if commit and len(taxa) > 0:
        # session.execute("DELETE FROM t2_ultrataxon_sighting WHERE taxon_id IN (:taxa)", { 'taxa': taxa })
        session.execute(
            "DELETE FROM t2_ultrataxon_sighting WHERE taxon_id IN (%s)" %
            sql_list_placeholder('taxa', taxa),
            sql_list_argument('taxa', taxa))
        session.commit()

    # Process in parallel
    tasks = [(taxon_id, commit) for taxon_id in taxa]

    # This is important because we are about to spawn child processes, and this stops them attempting to share the
    # same database connection pool
    session.close()
    for result, error in tqdm(run_parallel(process_taxon, tasks),
                              total=len(tasks)):
        if error:
            print(error)
예제 #9
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_subibra():
    session = get_session()
    rows = session.execute("""SELECT id, name, state FROM region""").fetchall()
    region_info = []
    for id, name, state in rows:
        region_info.append({'id': int(id), 'name': name, 'state': state})
    session.close()
    return jsonify(region_info)
예제 #10
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_status():
    session = get_session()
    rows = session.execute(
        """SELECT id, description FROM taxon_status""").fetchall()
    res_info = []
    for id, description in rows:
        res_info.append({'id': int(id), 'description': description})
    session.close()
    return jsonify(res_info)
예제 #11
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_responsevariable():
    session = get_session()
    rows = session.execute(
        """SELECT id, description FROM response_variable_type""").fetchall()
    res_info = []
    for id, description in rows:
        res_info.append({'id': int(id), 'description': description})
    session.close()
    return jsonify(res_info)
예제 #12
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_searchtype():
    session = get_session()
    rows = session.execute(
        """SELECT id, description FROM search_type""").fetchall()
    searchtype_info = []
    for searchtype_id, name in rows:
        searchtype_info.append({'id': int(searchtype_id), 'name': name})
    session.close()
    return jsonify(searchtype_info)
예제 #13
0
def process_database(species=None, commit=False):
    """
    Calculates spatial representativeness using alpha hulls

    Generates alpha hulls from each source x taxon combination

    Intersects alpha hulls with range layers, and then calculates percentage of range covered
    """
    session = get_session()

    if commit:
        if species == None:
            session.execute("DELETE FROM taxon_source_alpha_hull")
        else:
            session.execute(
                """DELETE FROM taxon_source_alpha_hull
                WHERE taxon_id IN (SELECT id FROM taxon WHERE spno IN (%s))"""
                % sql_list_placeholder('species', species),
                sql_list_argument('species', species))
        session.commit()

    # Load coastal shapefile
    coastal_shape_filename = tsx.config.config.get("processing.alpha_hull",
                                                   "coastal_shp")
    with fiona.Env(OSR_WKT_FORMAT="WKT2_2018"), fiona.open(
            coastal_shape_filename, 'r') as coastal_shape:
        # Convert from fiona dictionary to shapely geometry and reproject
        shp_to_working_transformer = pyproj.Transformer.from_proj(
            pyproj.CRS.from_wkt(coastal_shape.crs_wkt),
            working_proj,
            always_xy=True)
        coastal_shape = reproject(shape(coastal_shape[0]['geometry']),
                                  shp_to_working_transformer)
        # Simplify coastal boundary - makes things run ~20X faster
        log.info("Simplifying coastal boundary")
        coastal_shape = coastal_shape.buffer(10000).simplify(10000)

    log.info("Generating alpha shapes")

    for data_type in 1, 2:
        log.info("Processing type %s data" % data_type)

        taxa = get_taxa(session, data_type, species)

        tasks = [(taxon_id, coastal_shape, data_type, commit)
                 for taxon_id in taxa]

        # This is important because we are about to spawn child processes, and this stops them attempting to share the
        # same database connection pool
        session.close()  # TODO: not sure if this is needed now

        # Process all the species in parallel
        for result, error in tqdm(run_parallel(process, tasks),
                                  total=len(tasks)):
            if error:
                print(error)
예제 #14
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_species():
    session = get_session()
    rows = session.execute(
        """SELECT spno, common_name FROM taxon ORDER BY common_name"""
    ).fetchall()
    species_info = []
    for spno, common_name in rows:
        species_info.append({'spno': int(spno), 'common_name': common_name})
    session.close()
    return jsonify(species_info)
예제 #15
0
def main():
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s')

    parser = argparse.ArgumentParser(
        description='Import regions from shapefile')
    parser.add_argument('filename',
                        type=str,
                        help='Shapefile containing regions')
    args = parser.parse_args()

    session = get_session()

    session.execute("DELETE FROM t1_survey_region")

    with fiona.open(args.filename, encoding='Windows-1252') as shp:
        for index, feature in enumerate(tqdm(shp)):
            props = feature['properties']

            geometry = shape(
                transform_geom(shp.crs, 'EPSG:4326', feature['geometry']))
            geometry = geometry.buffer(0)

            session.execute(
                """INSERT INTO region (id, name, geometry, state, positional_accuracy_in_m)
					VALUES (:id, :name, ST_GeomFromWKB(_BINARY :geometry_wkb), :state, :positional_accuracy_in_m)""",
                {
                    'id': index,
                    'name': props['RegName'],
                    'geometry_wkb': shapely.wkb.dumps(
                        to_multipolygon(geometry)),
                    'state': props['StateName'],
                    'positional_accuracy_in_m': int(props['Accuracy'])
                })

            for geometry in subdivide_geometry(geometry):

                session.execute(
                    """INSERT INTO region_subdiv (id, name, geometry)
					VALUES (:id, :name, ST_GeomFromWKB(_BINARY :geometry_wkb))""", {
                        'id':
                        index,
                        'name':
                        props['RegName'],
                        'geometry_wkb':
                        shapely.wkb.dumps(to_multipolygon(geometry))
                    })

    log.info("Updating t1_survey_region (this may take a while)")
    session.execute("CALL update_t1_survey_region(NULL)")

    session.commit()
예제 #16
0
def process_database():
    session = get_session()

    log.info("Step 1/2 - Building list of filtered time series")

    # Get year range
    min_year = tsx.config.config.getint("processing", "min_year")
    max_year = tsx.config.config.getint("processing", "max_year")
    min_tssy = tsx.config.config.getint("processing",
                                        "min_time_series_sample_years")

    session.execute(
        """CREATE TEMPORARY TABLE tmp_filtered_ts
		( INDEX (time_series_id) )
		SELECT time_series_id
		FROM aggregated_by_year agg
		INNER JOIN taxon ON agg.taxon_id = taxon.id
		LEFT JOIN data_source ON data_source.taxon_id = agg.taxon_id AND data_source.source_id = agg.source_id
		WHERE agg.start_date_y <= COALESCE(data_source.end_year, :max_year)
		AND agg.start_date_y >= COALESCE(data_source.start_year, :min_year)
		AND NOT data_source.exclude_from_analysis
		AND COALESCE(agg.search_type_id, 0) != 6
		AND COALESCE(taxon.max_status_id, 0) NOT IN (0,1,7)
		AND region_id IS NOT NULL
		AND COALESCE(data_source.data_agreement_id, -1) NOT IN (0)
		AND COALESCE(data_source.standardisation_of_method_effort_id, -1) NOT IN (0, 1)
		AND COALESCE(data_source.consistency_of_monitoring_id, -1) NOT IN (0, 1)
		AND experimental_design_type_id = 1
		GROUP BY agg.time_series_id
		HAVING MAX(value) > 0
		AND COUNT(DISTINCT start_date_y) >= :min_tssy;
	""", {
            'min_year': min_year,
            'max_year': max_year,
            'min_tssy': min_tssy
        })

    log.info("Step 2/2 - Updating aggregated_by_year table")

    session.execute(
        """UPDATE aggregated_by_year agg
		LEFT JOIN data_source ON data_source.taxon_id = agg.taxon_id AND data_source.source_id = agg.source_id
		SET agg.include_in_analysis =
			agg.time_series_id IN (SELECT time_series_id FROM tmp_filtered_ts)
			AND agg.start_date_y <= COALESCE(data_source.end_year, :max_year)
			AND agg.start_date_y >= COALESCE(data_source.start_year, :min_year)
	""", {
            'min_year': min_year,
            'max_year': max_year
        })

    session.execute("""DROP TABLE tmp_filtered_ts""")

    log.info("Done")
예제 #17
0
def main():
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.INFO,
        format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s')

    parser = argparse.ArgumentParser(description='Import incidental sightings')
    parser.add_argument('filename',
                        type=str,
                        help='Incidental sightings file (CSV)')
    args = parser.parse_args()

    session = get_session()

    spno_map = {
        spno: taxon_id
        for spno, taxon_id in session.execute("""SELECT spno, taxon.id
			FROM taxon, taxon_level
			WHERE taxon_level.id = taxon_level_id
			AND taxon_level.description = 'sp'""").fetchall()
    }

    session.execute("""DELETE FROM incidental_sighting""")

    with open(args.filename) as f:
        reader = csv.DictReader(f)
        row_count = sum(1 for row in reader)

    with open(args.filename) as f:
        reader = csv.DictReader(f)
        rows = []

        def flush_rows():
            session.execute(
                "INSERT INTO incidental_sighting (taxon_id, coords) VALUES (:taxon_id, ST_GeomFromWKB(_BINARY :coords))",
                rows)
            del rows[:]

        for row in tqdm(reader, total=row_count):
            spno = int(row['SpNo'])
            x = float(row['Longitude'])
            y = float(row['Latitude'])
            if spno in spno_map:
                rows.append({
                    'taxon_id': spno_map[spno],
                    'coords': shapely.wkb.dumps(Point(x, y))
                })
            if len(rows) > 4000:
                flush_rows()

        flush_rows()

    session.commit()
예제 #18
0
파일: importer.py 프로젝트: ElisaBayra/tsx
def test_db():
	"""
	Test the db conneciton
	"""
	print("Testing DB Connection")
	print()
	session = get_session()
	# list all units
	for u in session.query(Unit).all():
		print("%d: %s" % (u.id, u.description))
	print()
	print("DB Connection successful")
예제 #19
0
def process_database(commit = False):
	session = get_session()

	if not (is_empty(session, "t2_processed_survey") and is_empty(session, "t2_survey_site")):
		log.error("Existing outputs found - please drop and recreate these tables first: t2_processed_sighting, t2_processed_survey, t2_survey_site")
		return

	process_sites(session)
	process_grid(session)

	if commit:
		log.info("Committing changes")
		session.commit()
	else:
		log.info("Rolling back changes (dry-run only)")
		session.rollback()
예제 #20
0
	def get_taxon_sites(taxon_id):
		session = get_session()
		try:
			return session.execute("""SELECT DISTINCT t2_survey_site.site_id, taxon_id
				FROM taxon_presence_alpha_hull_subdiv alpha STRAIGHT_JOIN t2_survey USE INDEX (coords), t2_survey_site
				WHERE ST_Contains(alpha.geometry, t2_survey.coords)
				AND t2_survey_site.survey_id = t2_survey.id
				AND taxon_id = :taxon_id
				AND alpha.range_id = 1""", {
					'taxon_id': taxon_id
				}).fetchall()
		except:
			log.exception("Exception getting sites for taxon")
			raise
		finally:
			session.close()
예제 #21
0
파일: process.py 프로젝트: nesp-tsr3-1/tsx
def clear_database():
    # Clears out all derived data from the database
    session = get_session()
    statements = [
        "SET FOREIGN_KEY_CHECKS = 0;", "TRUNCATE taxon_presence_alpha_hull;",
        "TRUNCATE taxon_presence_alpha_hull_subdiv;",
        "TRUNCATE t2_ultrataxon_sighting;", "TRUNCATE t2_processed_survey;",
        "TRUNCATE t2_processed_sighting;", "TRUNCATE t2_survey_site;",
        "TRUNCATE aggregated_by_year;", "TRUNCATE aggregated_by_month;",
        "SET FOREIGN_KEY_CHECKS = 1;"
    ]

    for sql in tqdm(statements):
        session.execute(sql)

    session.commit()
예제 #22
0
파일: misc.py 프로젝트: ElisaBayra/tsx
def get_source():
    session = get_session()
    rows = session.execute(
        """SELECT id, source_type_id, provider, description FROM source ORDER BY description"""
    ).fetchall()
    res_info = []
    for id, source_type_id, provider, description in rows:
        res_info.append({
            'id':
            int(id),
            'source_type_id':
            None if source_type_id == None else int(source_type_id),
            'provider':
            provider,
            'description':
            description
        })
    session.close()
    return jsonify(res_info)
예제 #23
0
def main():
	logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s')

	parser = argparse.ArgumentParser(description='Import Type 2 sites from shapefile')
	parser.add_argument('filename', type=str, help='Shapefile containing type 2 site polygons')
	args = parser.parse_args()

	session = get_session()

	with fiona.open(args.filename, encoding = 'Windows-1252') as shp:
		reproject = reproject_fn(pyproj.Proj(shp.crs), pyproj.Proj('+init=EPSG:4326'))
		for feature in tqdm(shp):
			props = feature['properties']
			geometry = to_multipolygon(reproject(shape(feature['geometry'])))

			session.execute("""INSERT INTO t2_site (search_type_id, geometry)
				VALUES (:search_type_id, ST_GeomFromWKB(_BINARY :geometry_wkb))""", {
					'search_type_id': props['SiteType'],
					'geometry_wkb': shapely.wkb.dumps(geometry)
				})

	session.commit()
예제 #24
0
def get_intensity():
    filtered_data = get_filtered_data()
    if len(filtered_data) == 0:
        return json.dumps([])
    dat = filtered_data.to_dict()

    source = request.args.get('source', type=str)
    if source == 'lpi_wide_table':
        lats = dat['SurveysCentroidLatitude']
        longs = dat['SurveysCentroidLongitude']
        counts = dat['SurveyCount']
        ids = lats.keys()
        return json.dumps([[lats[id], longs[id], counts[id]] for id in ids])
    else:  # get it from database
        ids = dat['TimeSeriesID'].values()
        session = get_session()
        result = session.execute(
            """SELECT time_series_id, start_date_y as Year, ST_X(centroid_coords) as Latitude,
						ST_Y(centroid_coords) as Longitude, SUM(survey_count) as Count
						FROM aggregated_by_year
						WHERE include_in_analysis
						AND time_series_id in %s
						GROUP BY time_series_id, Year, Latitude, Longitude""" % str(tuple(ids)))
        values = pd.DataFrame.from_records(data=result.fetchall(),
                                           columns=result.keys()).to_dict()
        session.close()
        # years = values['Year']
        lats = values['Latitude']
        longs = values['Longitude']
        counts = values['Count']
        years = values['Year']
        timeSeriesIDs = {id: [] for id in ids}
        results = {}
        for k, v in values['time_series_id'].iteritems():
            timeSeriesIDs[v].append(k)
        return json.dumps([[
            lats[v[0]], longs[v[0]], [[years[i], int(counts[i])] for i in v]
        ] for k, v in timeSeriesIDs.iteritems() if len(v) > 0])
예제 #25
0
    def process_spno(spno):
        session = get_session()
        try:
            # Get raw points from DB
            raw_points = get_species_points(session, spno)

            if len(raw_points) < 4:
                # Not enough points to create an alpha hull
                return

            # Read points from database
            points = [reproject(p, db_proj, working_proj) for p in raw_points]

            # Generate alpha shape
            alpha_shp = make_alpha_hull(
                points = points,
                coastal_shape = coastal_shape,
                thinning_distance = tsx.config.config.getfloat('processing.alpha_hull', 'thinning_distance'),
                alpha = tsx.config.config.getfloat('processing.alpha_hull', 'alpha'),
                hullbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'hullbuffer_distance'),
                isolatedbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'isolatedbuffer_distance'))

            # Convert back to DB projection
            alpha_shp = reproject(alpha_shp, working_proj, db_proj)

            # Clean up geometry
            alpha_shp = alpha_shp.buffer(0)

            # Get range polygons to intersect with alpha shape
            for taxon_id, range_id, breeding_range_id, geom_wkb in get_species_range_polygons(session, spno):
                # Intersect and insert into DB
                geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer(0)
                geom = to_multipolygon(geom.intersection(alpha_shp)) # slow
                if len(geom) > 0:
                    session.execute("""INSERT INTO taxon_presence_alpha_hull (taxon_id, range_id, breeding_range_id, geometry)
                        VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
                            'taxon_id': taxon_id,
                            'range_id': range_id,
                            'breeding_range_id': breeding_range_id,
                            'geom_wkb': shapely.wkb.dumps(geom)
                        }
                    )
                # We also subdivide the geometries into small pieces and insert this into the database. This allows for much faster
                # spatial queries in the database.
                for subgeom in subdivide_geometry(geom, max_points = 100):
                    session.execute("""INSERT INTO taxon_presence_alpha_hull_subdiv (taxon_id, range_id, breeding_range_id, geometry)
                        VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
                            'taxon_id': taxon_id,
                            'range_id': range_id,
                            'breeding_range_id': breeding_range_id,
                            'geom_wkb': shapely.wkb.dumps(subgeom)
                        }
                    )
            if commit:
                session.commit()

        except:
            log.exception("Exception processing alpha hull")
            raise
        finally:
            session.close()
예제 #26
0
def process_database(species = None, commit = False):
    """
    Generates alpha hulls from raw sighting data in the database

    Intersects alpha hulls with range layers, and inserts the result back into the database
    """
    session = get_session()

    if species is None:
        species = get_all_spno(session)

    if commit:
        for spno in species:
            session.execute("""
                DELETE FROM taxon_presence_alpha_hull
                WHERE taxon_id IN (SELECT id FROM taxon WHERE spno = :spno)
                """, { 'spno': spno })
            session.execute("""
                DELETE FROM taxon_presence_alpha_hull_subdiv
                WHERE taxon_id IN (SELECT id FROM taxon WHERE spno = :spno)
                """, { 'spno': spno })
        session.commit()

    db_proj = pyproj.Proj('+init=EPSG:4326') # Database always uses WGS84
    working_proj = pyproj.Proj('+init=EPSG:3112') # GDA94 / Geoscience Australia Lambert - so that we can buffer in metres

    # Load coastal shapefile
    coastal_shape_filename = tsx.config.config.get("processing.alpha_hull", "coastal_shp")
    with fiona.open(coastal_shape_filename, 'r') as coastal_shape:
        # Convert from fiona dictionary to shapely geometry and reproject
        coastal_shape = reproject(shape(coastal_shape[0]['geometry']), pyproj.Proj(coastal_shape.crs), working_proj)
        # Simplify coastal boundary - makes things run ~20X faster
        log.info("Simplifying coastal boundary")
        coastal_shape = coastal_shape.buffer(10000).simplify(10000)

    log.info("Generating alpha shapes")

    # Process a single species.
    # This gets run off the main thread.
    def process_spno(spno):
        session = get_session()
        try:
            # Get raw points from DB
            raw_points = get_species_points(session, spno)

            if len(raw_points) < 4:
                # Not enough points to create an alpha hull
                return

            # Read points from database
            points = [reproject(p, db_proj, working_proj) for p in raw_points]

            # Generate alpha shape
            alpha_shp = make_alpha_hull(
                points = points,
                coastal_shape = coastal_shape,
                thinning_distance = tsx.config.config.getfloat('processing.alpha_hull', 'thinning_distance'),
                alpha = tsx.config.config.getfloat('processing.alpha_hull', 'alpha'),
                hullbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'hullbuffer_distance'),
                isolatedbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'isolatedbuffer_distance'))

            # Convert back to DB projection
            alpha_shp = reproject(alpha_shp, working_proj, db_proj)

            # Clean up geometry
            alpha_shp = alpha_shp.buffer(0)

            # Get range polygons to intersect with alpha shape
            for taxon_id, range_id, breeding_range_id, geom_wkb in get_species_range_polygons(session, spno):
                # Intersect and insert into DB
                geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer(0)
                geom = to_multipolygon(geom.intersection(alpha_shp)) # slow
                if len(geom) > 0:
                    session.execute("""INSERT INTO taxon_presence_alpha_hull (taxon_id, range_id, breeding_range_id, geometry)
                        VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
                            'taxon_id': taxon_id,
                            'range_id': range_id,
                            'breeding_range_id': breeding_range_id,
                            'geom_wkb': shapely.wkb.dumps(geom)
                        }
                    )
                # We also subdivide the geometries into small pieces and insert this into the database. This allows for much faster
                # spatial queries in the database.
                for subgeom in subdivide_geometry(geom, max_points = 100):
                    session.execute("""INSERT INTO taxon_presence_alpha_hull_subdiv (taxon_id, range_id, breeding_range_id, geometry)
                        VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", {
                            'taxon_id': taxon_id,
                            'range_id': range_id,
                            'breeding_range_id': breeding_range_id,
                            'geom_wkb': shapely.wkb.dumps(subgeom)
                        }
                    )
            if commit:
                session.commit()

        except:
            log.exception("Exception processing alpha hull")
            raise
        finally:
            session.close()

    # This is important because we are about to spawn child processes, and this stops them attempting to share the
    # same database connection pool
    session.close()
    tsx.db.connect.engine.dispose()
    # Process all the species in parallel
    for result, error in tqdm(run_parallel(process_spno, species, use_processes = True), total = len(species)):
        if error:
            print error
예제 #27
0
def process(taxon_id, coastal_shape, data_type, commit):
    session = get_session()

    try:
        # Load core range geometry
        core_range_geom = reproject(
            get_core_range_geometry(session, taxon_id),
            to_working_transformer).buffer(0).intersection(coastal_shape)

        for source_id in get_source_ids(session, data_type, taxon_id):

            log.info("Processing taxon_id: %s, source_id: %s" %
                     (taxon_id, source_id))

            # Get raw points from DB
            raw_points = get_raw_points(session, data_type, taxon_id,
                                        source_id)

            empty = len(raw_points) < 4

            if empty:
                log.info(
                    "Taxon %s: not enough points to create alpha hull (%s)" %
                    (taxon_id, len(raw_points)))

            if not empty:
                # Read points from database
                points = [
                    reproject(p, to_working_transformer) for p in raw_points
                ]

                # Generate alpha shape
                alpha_shp = make_alpha_hull(
                    points=points,
                    coastal_shape=None,
                    thinning_distance=tsx.config.config.getfloat(
                        'processing.alpha_hull', 'thinning_distance'),
                    alpha=tsx.config.config.getfloat('processing.alpha_hull',
                                                     'alpha'),
                    hullbuffer_distance=tsx.config.config.getfloat(
                        'processing.alpha_hull', 'hullbuffer_distance'),
                    isolatedbuffer_distance=tsx.config.config.getfloat(
                        'processing.alpha_hull', 'isolatedbuffer_distance'))

                # Clean up geometry
                alpha_shp = alpha_shp.buffer(0)

                if core_range_geom.area == 0:
                    log.info("Core range geometry area is zero")
                    empty = True

                else:
                    # Intersect alpha hull with core range
                    intersected_alpha = to_multipolygon(
                        core_range_geom.intersection(alpha_shp))

                    empty = intersected_alpha.is_empty

            if empty:
                session.execute(
                    """INSERT INTO taxon_source_alpha_hull (source_id, taxon_id, data_type, core_range_area_in_m2, alpha_hull_area_in_m2)
                    VALUES (:source_id, :taxon_id, :data_type, 0, 0)""", {
                        'source_id': source_id,
                        'taxon_id': taxon_id,
                        'data_type': data_type
                    })
            else:
                session.execute(
                    """INSERT INTO taxon_source_alpha_hull (source_id, taxon_id, data_type, geometry, core_range_area_in_m2, alpha_hull_area_in_m2)
                    VALUES (:source_id, :taxon_id, :data_type, ST_GeomFromWKB(_BINARY :geom_wkb), :core_range_area, :alpha_hull_area)""",
                    {
                        'source_id':
                        source_id,
                        'taxon_id':
                        taxon_id,
                        'data_type':
                        data_type,
                        'geom_wkb':
                        shapely.wkb.dumps(
                            reproject(intersected_alpha, to_db_transformer)),
                        'core_range_area':
                        core_range_geom.area,
                        'alpha_hull_area':
                        intersected_alpha.area
                    })

            if commit:
                session.commit()

    except:
        log.exception("Exception processing alpha hull")
        raise
    finally:
        session.close()
예제 #28
0
def process_taxon(taxon_id, commit):
    try:
        session = get_session()

        taxon = session.query(Taxon).get(taxon_id)

        for range_id, breeding_range_id, geom_wkb in get_taxon_range_polygons(
                session, taxon.id):
            if not taxon.ultrataxon:
                log.info("Skipping range polygon for non-ultrataxon: %s" %
                         taxon.id)
                continue

            # tqdm.write("Taxon: %s, range: %s" % (taxon.id, range_id))

            cache = {}

            geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer(
                0)  # Ensure valid

            bounds_wkb = shapely.wkb.dumps(Polygon.from_bounds(*geom.bounds))

            # Get all sightings matching that taxon
            q = session.execute(
                """SELECT t2_sighting.id, ST_X(coords), ST_Y(coords)
                FROM t2_sighting, t2_survey
                WHERE t2_sighting.survey_id = t2_survey.id
                AND t2_sighting.taxon_id = :taxon_id
                AND t2_sighting.taxon_id NOT IN (SELECT taxon_id FROM t2_ultrataxon_sighting WHERE sighting_id = t2_sighting.id)
                AND MBRContains(ST_GeomFromWKB(_BINARY :bounds_wkb), coords) """,
                {
                    'taxon_id': taxon.id,
                    'bounds_wkb': bounds_wkb
                })

            records = []

            for sighting_id, x, y in q.fetchall():
                if point_intersects_geom(geom, x, y, cache):
                    records.append(
                        T2UltrataxonSighting(sighting_id=sighting_id,
                                             taxon_id=taxon.id,
                                             range_id=range_id,
                                             generated_subspecies=False))

            if taxon.taxon_level.description == 'ssp':
                # Get parent taxa (species) sightings
                q = session.execute(
                    """SELECT t2_sighting.id, ST_X(coords), ST_Y(coords)
                    FROM t2_sighting, t2_survey, taxon taxon_ssp, taxon taxon_sp
                    WHERE t2_sighting.survey_id = t2_survey.id
                    AND t2_sighting.taxon_id = taxon_sp.id
                    AND taxon_ssp.id NOT IN (SELECT taxon_id FROM t2_ultrataxon_sighting WHERE sighting_id = t2_sighting.id)
                    AND taxon_sp.taxon_level_id = (SELECT id FROM taxon_level WHERE description = 'sp')
                    AND taxon_sp.spno = taxon_ssp.spno
                    AND taxon_ssp.id = :taxon_id
                    AND MBRContains(ST_GeomFromWKB(_BINARY :bounds_wkb), coords) """,
                    {
                        'taxon_id': taxon.id,
                        'bounds_wkb': bounds_wkb
                    })

                for sighting_id, x, y in q.fetchall():
                    if point_intersects_geom(geom, x, y, cache):
                        records.append(
                            T2UltrataxonSighting(sighting_id=sighting_id,
                                                 taxon_id=taxon.id,
                                                 range_id=range_id,
                                                 generated_subspecies=True))

            session.bulk_save_objects(records)
        if commit:
            session.commit()
    except Exception as e:
        log.exception('Exception in range and ultrataxon processing')
        raise
    finally:
        session.close()
예제 #29
0
def build_filter_string():
    filter_str = ""
    #spno
    filters = []
    if request.args.has_key('spno'):
        _sp_no = request.args.get('spno', type=int)
        filters.append("SpNo=='%d'" % (_sp_no))
    if request.args.has_key('datatype'):
        _sp_no = request.args.get('datatype', type=int)
        filters.append("DataType=='%d'" % (_sp_no))
    #state
    if request.args.has_key('state'):
        _stateList = request.args.get('state', type=str).split('+')
        filters.append("(%s)" %
                       " or ".join(["State=='%s'" % s for s in _stateList]))
    #searchtypedesc
    if request.args.has_key('searchtype'):
        _search_type = request.args.get('searchtype', type=int)
        # find in database
        session = get_session()
        _search_type_desc = session.execute(
            """SELECT * FROM search_type WHERE id = :searchtypeid""", {
                'searchtypeid': _search_type
            }).fetchone()['description']
        filters.append("SearchTypeDesc=='%s'" % (_search_type_desc))
        session.close()
    #subibra
    if request.args.has_key('subibra'):
        _subibra = request.args.get('subibra', type=str)
        filters.append("SubIBRA=='%s'" % (_subibra))

    #sourceid
    if request.args.has_key('sourceid'):
        _sourceid = request.args.get('sourceid', type=int)
        filters.append("SourceID=='%d'" % (_sourceid))

    # Functional group
    if request.args.has_key('group'):
        _group = request.args.get('group', type=str)
        filters.append("FunctionalGroup=='%s'" % (_group))

    # functional subgroup
    if request.args.has_key('subgroup'):
        _subgroup = request.args.get('subgroup', type=str)
        filters.append("FunctionalSubGroup=='%s'" % (_subgroup))

    # status/statusauth
    if request.args.has_key('status') and request.args.has_key(
            'statusauth'):  #IUCN, EPBC, BirdLifeAustralia, Max
        _statusList = request.args.get('status', type=str).split('+')
        _statusauth = request.args.get('statusauth', type=str)
        filters.append("(%s)" % " or ".join(
            ["%sStatus=='%s'" % (_statusauth, s) for s in _statusList]))

    # national priority
    if request.args.has_key('priority'):
        filters.append("NationalPriorityTaxa=='%d'" %
                       (request.args.get('priority', type=int)))

    if len(filters) > 0:
        return " and ".join(filters)
    else:
        return None
예제 #30
0
def get_stats(filtered_data):
    df = filtered_data

    years = [col for col in df.columns if col.isdigit()]
    int_years = [int(year) for year in years]

    year_df = df.loc[:, years] * 0 + int_years

    # Time series length
    ts_length = df[
        'TimeSeriesLength']  # year_df.max(axis = 1) - year_df.min(axis = 1) + 1

    # Time series sample years
    ts_years = df['TimeSeriesSampleYears']  # (year_df * 0 + 1).sum(axis = 1)

    n_sources = df['SourceDesc'].nunique()
    n_taxa = df['TaxonID'].nunique()

    grouped_by_taxon = df.groupby('TaxonID').agg({
        'TimeSeriesLength':
        np.mean,
        'SourceDesc':
        lambda x: x.nunique(),
        'TimeSeriesID':
        lambda x: x.nunique(),
        'SpatialRepresentativeness':
        np.mean
    }).rename(
        columns={
            'TimeSeriesLength': 'ts_length_mean',
            'SourceDesc': 'num_sources',
            'TimeSeriesID': 'num_ts',
            'SpatialRepresentativeness': 'spatial_rep'
        })

    session = get_session()
    result = session.execute(
        """SELECT
			id AS 'TaxonID',
			common_name,
			scientific_name,
			bird_group,
			bird_sub_group,
			(SELECT description FROM taxon_status WHERE taxon_status.id = aust_status_id) AS aust_status,
			(SELECT description FROM taxon_status WHERE taxon_status.id = epbc_status_id) AS epbc_status
		FROM taxon
		WHERE GREATEST(COALESCE(aust_status_id, 0), COALESCE(epbc_status_id, 0), COALESCE(iucn_status_id, 0)) NOT IN (0,1,7)
		AND (ultrataxon OR taxon.id IN :taxon_ids)""",
        {'taxon_ids': list(df['TaxonID'].unique())})
    session.close()

    all_taxa = pd.DataFrame.from_records(data=result.fetchall(),
                                         index='TaxonID',
                                         columns=result.keys())

    joined = all_taxa.join(grouped_by_taxon, how='outer').sort_values(
        ['bird_group', 'bird_sub_group', 'common_name'], na_position='first')
    joined = joined.reset_index().rename(columns={'TaxonID': 'taxon_id'})
    joined['spatial_rep'] *= 100

    taxa_with_data = joined.query('num_ts > 0')
    taxa_without_data = joined.query('num_ts != num_ts').drop(
        columns=['ts_length_mean', 'num_sources', 'num_ts', 'spatial_rep'])

    return {
        'num_sources': df['SourceDesc'].nunique(),
        'num_taxa': df['TaxonID'].nunique(),
        'ts_length': {
            'mean': ts_length.mean(),
            'stddev': ts_length.std()
        },
        'ts_years': {
            'mean': ts_years.mean(),
            'stddev': ts_years.std()
        },
        'taxa_with_data': taxa_with_data.to_dict(orient='records'),
        'taxa_without_data': taxa_without_data.to_dict(orient='records')
    }