def get_db_session(): dataset = get_dataset_name() if dataset == None: return get_session() else: return get_session(database_config=("database_%s" % dataset))
def process_database(commit=False): session = get_session() # This speeds up the t2_survey_site spatial query by a factor of about 6 session.execute("SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED") if not (is_empty(session, "t2_processed_survey") and is_empty(session, "t2_survey_site")): log.error( "Existing outputs found - please drop and recreate these tables first: t2_processed_sighting, t2_processed_survey, t2_survey_site" ) log.info("""Run: SET FOREIGN_KEY_CHECKS = 0; TRUNCATE t2_processed_sighting; TRUNCATE t2_processed_survey; TRUNCATE t2_survey_site; SET FOREIGN_KEY_CHECKS = 1; """) return process_sites(session) process_grid(session) if commit: log.info("Committing changes") session.commit() else: log.info("Rolling back changes (dry-run only)") session.rollback()
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s') parser = argparse.ArgumentParser( description='Import grid cells from shapefile') parser.add_argument('filename', type=str, help='Shapefile containing grid cells') args = parser.parse_args() session = get_session() with open_shapefile(args.filename, 'EPSG:4326') as (shp, reproject): for feature in tqdm(shp): props = feature['properties'] geometry = reproject(shape(feature['geometry'])) session.execute( """INSERT INTO grid_cell (id, geometry) VALUES (:search_type_id, ST_GeomFromWKB(_BINARY :geometry_wkb))""", { 'search_type_id': props['GridID'], 'geometry_wkb': shapely.wkb.dumps(geometry) }) session.commit()
def export_to_sqlite(source_id, path): session = get_session() dest_db = init_sqlite(path) # tables = [table for (table,) in session.execute("show tables").fetchall()] tables = [ ("t1_survey", "source_id = :source_id"), ("t1_site", "source_id = :source_id"), ("t1_sighting", "survey_id IN (SELECT id FROM t1_survey WHERE source_id = :source_id)" ), ("taxon", "id IN (SELECT taxon_id FROM t1_sighting, t1_survey WHERE survey_id = t1_survey.id AND source_id = :source_id)" ), ("aggregated_by_month", "FALSE"), ("aggregated_by_year", "FALSE"), ("taxon_group", None), ("taxon_status", None), ("search_type", None), ("source", "id = :source_id"), ("t2_site", "FALSE"), ("data_source", "source_id = :source_id"), ("region", "FALSE"), ("unit", None), ("taxon_source_alpha_hull", "FALSE"), ("intensive_management", None), ("experimental_design_type", None), ("response_variable_type", None), ("monitoring_program", None) ] for table, where_clause in tables: # print(table) copy_table_schema(session, table, dest_db) copy_table_data(session, table, dest_db, where_clause, {'source_id': source_id}) return path
def process_database(species = None, commit = False): session = get_session() if species == None: taxa = [taxon_id for (taxon_id,) in session.execute("SELECT DISTINCT taxon_id FROM processing_method WHERE data_type = 2").fetchall()] else: taxa = [taxon_id for (taxon_id,) in session.execute( "SELECT DISTINCT taxon_id FROM processing_method, taxon WHERE taxon.id = taxon_id AND data_type = 2 AND spno IN %s" % sql_list_placeholder('species', species), sql_list_argument('species', species) ).fetchall()] create_region_lookup_table(session) log.info("Step 1/2: Monthly aggregation") fn = functools.partial(aggregate_by_month, commit = commit) for result, error in tqdm(run_parallel(fn, taxa), total = len(taxa)): pass log.info("Step 2/2: Yearly aggregation") fn = functools.partial(aggregate_by_year, commit = commit) for result, error in tqdm(run_parallel(fn, taxa), total = len(taxa)): pass cleanup_region_lookup_table(session)
def process_database(species=None, commit=False): session = get_session() if species == None: taxa = [ taxon_id for (taxon_id, ) in session.execute( "SELECT DISTINCT taxon_id FROM t1_sighting").fetchall() ] else: taxa = [ taxon_id for (taxon_id, ) in session.execute( "SELECT DISTINCT taxon_id FROM t1_sighting, taxon WHERE taxon.id = taxon_id AND spno IN (%s)" % sql_list_placeholder('species', species), sql_list_argument('species', species)).fetchall() ] create_region_lookup_table(session) # Process in parallel tasks = [(taxon_id, commit) for taxon_id in taxa] log.info("Step 1/2: Monthly aggregation") for result, error in tqdm(run_parallel(aggregate_monthly, tasks), total=len(tasks)): if error: print error log.info("Step 1/2: Yearly aggregation") for result, error in tqdm(run_parallel(aggregate_yearly, tasks), total=len(tasks)): if error: print error cleanup_region_lookup_table(session)
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s') parser = argparse.ArgumentParser( description='Import species range polygons into TSX database') parser.add_argument('dir', type=str, help='Directory containing species range shapefiles') args = parser.parse_args() session = get_session() filenames = [f for f in os.listdir(args.dir) if f.endswith('.shp')] for filename in tqdm(filenames): spno = int(filename[0:-4]) try: # https://pyproj4.github.io/pyproj/stable/crs_compatibility.html#fiona with fiona.Env(OSR_WKT_FORMAT="WKT2_2018"), fiona.open( os.path.join(args.dir, filename), encoding='Windows-1252') as shp: process_shp(session, spno, shp) except KeyboardInterrupt: log.info("Aborting - no changes saved") return session.commit()
def process_database(species=None, commit=False): session = get_session() # Just get taxa that have range polygons if species == None: taxa = session.execute( "SELECT DISTINCT taxon_id FROM taxon_range").fetchall() else: taxa = session.execute( "SELECT DISTINCT taxon_id FROM taxon_range, taxon WHERE taxon_id = taxon.id AND spno IN (%s)" % sql_list_placeholder('species', species), sql_list_argument('species', species)).fetchall() # Unwrap tuple taxa = [taxon_id for (taxon_id, ) in taxa] # Delete old results if commit and len(taxa) > 0: # session.execute("DELETE FROM t2_ultrataxon_sighting WHERE taxon_id IN (:taxa)", { 'taxa': taxa }) session.execute( "DELETE FROM t2_ultrataxon_sighting WHERE taxon_id IN (%s)" % sql_list_placeholder('taxa', taxa), sql_list_argument('taxa', taxa)) session.commit() # Process in parallel tasks = [(taxon_id, commit) for taxon_id in taxa] # This is important because we are about to spawn child processes, and this stops them attempting to share the # same database connection pool session.close() for result, error in tqdm(run_parallel(process_taxon, tasks), total=len(tasks)): if error: print(error)
def get_subibra(): session = get_session() rows = session.execute("""SELECT id, name, state FROM region""").fetchall() region_info = [] for id, name, state in rows: region_info.append({'id': int(id), 'name': name, 'state': state}) session.close() return jsonify(region_info)
def get_status(): session = get_session() rows = session.execute( """SELECT id, description FROM taxon_status""").fetchall() res_info = [] for id, description in rows: res_info.append({'id': int(id), 'description': description}) session.close() return jsonify(res_info)
def get_responsevariable(): session = get_session() rows = session.execute( """SELECT id, description FROM response_variable_type""").fetchall() res_info = [] for id, description in rows: res_info.append({'id': int(id), 'description': description}) session.close() return jsonify(res_info)
def get_searchtype(): session = get_session() rows = session.execute( """SELECT id, description FROM search_type""").fetchall() searchtype_info = [] for searchtype_id, name in rows: searchtype_info.append({'id': int(searchtype_id), 'name': name}) session.close() return jsonify(searchtype_info)
def process_database(species=None, commit=False): """ Calculates spatial representativeness using alpha hulls Generates alpha hulls from each source x taxon combination Intersects alpha hulls with range layers, and then calculates percentage of range covered """ session = get_session() if commit: if species == None: session.execute("DELETE FROM taxon_source_alpha_hull") else: session.execute( """DELETE FROM taxon_source_alpha_hull WHERE taxon_id IN (SELECT id FROM taxon WHERE spno IN (%s))""" % sql_list_placeholder('species', species), sql_list_argument('species', species)) session.commit() # Load coastal shapefile coastal_shape_filename = tsx.config.config.get("processing.alpha_hull", "coastal_shp") with fiona.Env(OSR_WKT_FORMAT="WKT2_2018"), fiona.open( coastal_shape_filename, 'r') as coastal_shape: # Convert from fiona dictionary to shapely geometry and reproject shp_to_working_transformer = pyproj.Transformer.from_proj( pyproj.CRS.from_wkt(coastal_shape.crs_wkt), working_proj, always_xy=True) coastal_shape = reproject(shape(coastal_shape[0]['geometry']), shp_to_working_transformer) # Simplify coastal boundary - makes things run ~20X faster log.info("Simplifying coastal boundary") coastal_shape = coastal_shape.buffer(10000).simplify(10000) log.info("Generating alpha shapes") for data_type in 1, 2: log.info("Processing type %s data" % data_type) taxa = get_taxa(session, data_type, species) tasks = [(taxon_id, coastal_shape, data_type, commit) for taxon_id in taxa] # This is important because we are about to spawn child processes, and this stops them attempting to share the # same database connection pool session.close() # TODO: not sure if this is needed now # Process all the species in parallel for result, error in tqdm(run_parallel(process, tasks), total=len(tasks)): if error: print(error)
def get_species(): session = get_session() rows = session.execute( """SELECT spno, common_name FROM taxon ORDER BY common_name""" ).fetchall() species_info = [] for spno, common_name in rows: species_info.append({'spno': int(spno), 'common_name': common_name}) session.close() return jsonify(species_info)
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s') parser = argparse.ArgumentParser( description='Import regions from shapefile') parser.add_argument('filename', type=str, help='Shapefile containing regions') args = parser.parse_args() session = get_session() session.execute("DELETE FROM t1_survey_region") with fiona.open(args.filename, encoding='Windows-1252') as shp: for index, feature in enumerate(tqdm(shp)): props = feature['properties'] geometry = shape( transform_geom(shp.crs, 'EPSG:4326', feature['geometry'])) geometry = geometry.buffer(0) session.execute( """INSERT INTO region (id, name, geometry, state, positional_accuracy_in_m) VALUES (:id, :name, ST_GeomFromWKB(_BINARY :geometry_wkb), :state, :positional_accuracy_in_m)""", { 'id': index, 'name': props['RegName'], 'geometry_wkb': shapely.wkb.dumps( to_multipolygon(geometry)), 'state': props['StateName'], 'positional_accuracy_in_m': int(props['Accuracy']) }) for geometry in subdivide_geometry(geometry): session.execute( """INSERT INTO region_subdiv (id, name, geometry) VALUES (:id, :name, ST_GeomFromWKB(_BINARY :geometry_wkb))""", { 'id': index, 'name': props['RegName'], 'geometry_wkb': shapely.wkb.dumps(to_multipolygon(geometry)) }) log.info("Updating t1_survey_region (this may take a while)") session.execute("CALL update_t1_survey_region(NULL)") session.commit()
def process_database(): session = get_session() log.info("Step 1/2 - Building list of filtered time series") # Get year range min_year = tsx.config.config.getint("processing", "min_year") max_year = tsx.config.config.getint("processing", "max_year") min_tssy = tsx.config.config.getint("processing", "min_time_series_sample_years") session.execute( """CREATE TEMPORARY TABLE tmp_filtered_ts ( INDEX (time_series_id) ) SELECT time_series_id FROM aggregated_by_year agg INNER JOIN taxon ON agg.taxon_id = taxon.id LEFT JOIN data_source ON data_source.taxon_id = agg.taxon_id AND data_source.source_id = agg.source_id WHERE agg.start_date_y <= COALESCE(data_source.end_year, :max_year) AND agg.start_date_y >= COALESCE(data_source.start_year, :min_year) AND NOT data_source.exclude_from_analysis AND COALESCE(agg.search_type_id, 0) != 6 AND COALESCE(taxon.max_status_id, 0) NOT IN (0,1,7) AND region_id IS NOT NULL AND COALESCE(data_source.data_agreement_id, -1) NOT IN (0) AND COALESCE(data_source.standardisation_of_method_effort_id, -1) NOT IN (0, 1) AND COALESCE(data_source.consistency_of_monitoring_id, -1) NOT IN (0, 1) AND experimental_design_type_id = 1 GROUP BY agg.time_series_id HAVING MAX(value) > 0 AND COUNT(DISTINCT start_date_y) >= :min_tssy; """, { 'min_year': min_year, 'max_year': max_year, 'min_tssy': min_tssy }) log.info("Step 2/2 - Updating aggregated_by_year table") session.execute( """UPDATE aggregated_by_year agg LEFT JOIN data_source ON data_source.taxon_id = agg.taxon_id AND data_source.source_id = agg.source_id SET agg.include_in_analysis = agg.time_series_id IN (SELECT time_series_id FROM tmp_filtered_ts) AND agg.start_date_y <= COALESCE(data_source.end_year, :max_year) AND agg.start_date_y >= COALESCE(data_source.start_year, :min_year) """, { 'min_year': min_year, 'max_year': max_year }) session.execute("""DROP TABLE tmp_filtered_ts""") log.info("Done")
def main(): logging.basicConfig( stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s') parser = argparse.ArgumentParser(description='Import incidental sightings') parser.add_argument('filename', type=str, help='Incidental sightings file (CSV)') args = parser.parse_args() session = get_session() spno_map = { spno: taxon_id for spno, taxon_id in session.execute("""SELECT spno, taxon.id FROM taxon, taxon_level WHERE taxon_level.id = taxon_level_id AND taxon_level.description = 'sp'""").fetchall() } session.execute("""DELETE FROM incidental_sighting""") with open(args.filename) as f: reader = csv.DictReader(f) row_count = sum(1 for row in reader) with open(args.filename) as f: reader = csv.DictReader(f) rows = [] def flush_rows(): session.execute( "INSERT INTO incidental_sighting (taxon_id, coords) VALUES (:taxon_id, ST_GeomFromWKB(_BINARY :coords))", rows) del rows[:] for row in tqdm(reader, total=row_count): spno = int(row['SpNo']) x = float(row['Longitude']) y = float(row['Latitude']) if spno in spno_map: rows.append({ 'taxon_id': spno_map[spno], 'coords': shapely.wkb.dumps(Point(x, y)) }) if len(rows) > 4000: flush_rows() flush_rows() session.commit()
def test_db(): """ Test the db conneciton """ print("Testing DB Connection") print() session = get_session() # list all units for u in session.query(Unit).all(): print("%d: %s" % (u.id, u.description)) print() print("DB Connection successful")
def process_database(commit = False): session = get_session() if not (is_empty(session, "t2_processed_survey") and is_empty(session, "t2_survey_site")): log.error("Existing outputs found - please drop and recreate these tables first: t2_processed_sighting, t2_processed_survey, t2_survey_site") return process_sites(session) process_grid(session) if commit: log.info("Committing changes") session.commit() else: log.info("Rolling back changes (dry-run only)") session.rollback()
def get_taxon_sites(taxon_id): session = get_session() try: return session.execute("""SELECT DISTINCT t2_survey_site.site_id, taxon_id FROM taxon_presence_alpha_hull_subdiv alpha STRAIGHT_JOIN t2_survey USE INDEX (coords), t2_survey_site WHERE ST_Contains(alpha.geometry, t2_survey.coords) AND t2_survey_site.survey_id = t2_survey.id AND taxon_id = :taxon_id AND alpha.range_id = 1""", { 'taxon_id': taxon_id }).fetchall() except: log.exception("Exception getting sites for taxon") raise finally: session.close()
def clear_database(): # Clears out all derived data from the database session = get_session() statements = [ "SET FOREIGN_KEY_CHECKS = 0;", "TRUNCATE taxon_presence_alpha_hull;", "TRUNCATE taxon_presence_alpha_hull_subdiv;", "TRUNCATE t2_ultrataxon_sighting;", "TRUNCATE t2_processed_survey;", "TRUNCATE t2_processed_sighting;", "TRUNCATE t2_survey_site;", "TRUNCATE aggregated_by_year;", "TRUNCATE aggregated_by_month;", "SET FOREIGN_KEY_CHECKS = 1;" ] for sql in tqdm(statements): session.execute(sql) session.commit()
def get_source(): session = get_session() rows = session.execute( """SELECT id, source_type_id, provider, description FROM source ORDER BY description""" ).fetchall() res_info = [] for id, source_type_id, provider, description in rows: res_info.append({ 'id': int(id), 'source_type_id': None if source_type_id == None else int(source_type_id), 'provider': provider, 'description': description }) session.close() return jsonify(res_info)
def main(): logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(name)s %(levelname)-8s %(message)s') parser = argparse.ArgumentParser(description='Import Type 2 sites from shapefile') parser.add_argument('filename', type=str, help='Shapefile containing type 2 site polygons') args = parser.parse_args() session = get_session() with fiona.open(args.filename, encoding = 'Windows-1252') as shp: reproject = reproject_fn(pyproj.Proj(shp.crs), pyproj.Proj('+init=EPSG:4326')) for feature in tqdm(shp): props = feature['properties'] geometry = to_multipolygon(reproject(shape(feature['geometry']))) session.execute("""INSERT INTO t2_site (search_type_id, geometry) VALUES (:search_type_id, ST_GeomFromWKB(_BINARY :geometry_wkb))""", { 'search_type_id': props['SiteType'], 'geometry_wkb': shapely.wkb.dumps(geometry) }) session.commit()
def get_intensity(): filtered_data = get_filtered_data() if len(filtered_data) == 0: return json.dumps([]) dat = filtered_data.to_dict() source = request.args.get('source', type=str) if source == 'lpi_wide_table': lats = dat['SurveysCentroidLatitude'] longs = dat['SurveysCentroidLongitude'] counts = dat['SurveyCount'] ids = lats.keys() return json.dumps([[lats[id], longs[id], counts[id]] for id in ids]) else: # get it from database ids = dat['TimeSeriesID'].values() session = get_session() result = session.execute( """SELECT time_series_id, start_date_y as Year, ST_X(centroid_coords) as Latitude, ST_Y(centroid_coords) as Longitude, SUM(survey_count) as Count FROM aggregated_by_year WHERE include_in_analysis AND time_series_id in %s GROUP BY time_series_id, Year, Latitude, Longitude""" % str(tuple(ids))) values = pd.DataFrame.from_records(data=result.fetchall(), columns=result.keys()).to_dict() session.close() # years = values['Year'] lats = values['Latitude'] longs = values['Longitude'] counts = values['Count'] years = values['Year'] timeSeriesIDs = {id: [] for id in ids} results = {} for k, v in values['time_series_id'].iteritems(): timeSeriesIDs[v].append(k) return json.dumps([[ lats[v[0]], longs[v[0]], [[years[i], int(counts[i])] for i in v] ] for k, v in timeSeriesIDs.iteritems() if len(v) > 0])
def process_spno(spno): session = get_session() try: # Get raw points from DB raw_points = get_species_points(session, spno) if len(raw_points) < 4: # Not enough points to create an alpha hull return # Read points from database points = [reproject(p, db_proj, working_proj) for p in raw_points] # Generate alpha shape alpha_shp = make_alpha_hull( points = points, coastal_shape = coastal_shape, thinning_distance = tsx.config.config.getfloat('processing.alpha_hull', 'thinning_distance'), alpha = tsx.config.config.getfloat('processing.alpha_hull', 'alpha'), hullbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'hullbuffer_distance'), isolatedbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'isolatedbuffer_distance')) # Convert back to DB projection alpha_shp = reproject(alpha_shp, working_proj, db_proj) # Clean up geometry alpha_shp = alpha_shp.buffer(0) # Get range polygons to intersect with alpha shape for taxon_id, range_id, breeding_range_id, geom_wkb in get_species_range_polygons(session, spno): # Intersect and insert into DB geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer(0) geom = to_multipolygon(geom.intersection(alpha_shp)) # slow if len(geom) > 0: session.execute("""INSERT INTO taxon_presence_alpha_hull (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': taxon_id, 'range_id': range_id, 'breeding_range_id': breeding_range_id, 'geom_wkb': shapely.wkb.dumps(geom) } ) # We also subdivide the geometries into small pieces and insert this into the database. This allows for much faster # spatial queries in the database. for subgeom in subdivide_geometry(geom, max_points = 100): session.execute("""INSERT INTO taxon_presence_alpha_hull_subdiv (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': taxon_id, 'range_id': range_id, 'breeding_range_id': breeding_range_id, 'geom_wkb': shapely.wkb.dumps(subgeom) } ) if commit: session.commit() except: log.exception("Exception processing alpha hull") raise finally: session.close()
def process_database(species = None, commit = False): """ Generates alpha hulls from raw sighting data in the database Intersects alpha hulls with range layers, and inserts the result back into the database """ session = get_session() if species is None: species = get_all_spno(session) if commit: for spno in species: session.execute(""" DELETE FROM taxon_presence_alpha_hull WHERE taxon_id IN (SELECT id FROM taxon WHERE spno = :spno) """, { 'spno': spno }) session.execute(""" DELETE FROM taxon_presence_alpha_hull_subdiv WHERE taxon_id IN (SELECT id FROM taxon WHERE spno = :spno) """, { 'spno': spno }) session.commit() db_proj = pyproj.Proj('+init=EPSG:4326') # Database always uses WGS84 working_proj = pyproj.Proj('+init=EPSG:3112') # GDA94 / Geoscience Australia Lambert - so that we can buffer in metres # Load coastal shapefile coastal_shape_filename = tsx.config.config.get("processing.alpha_hull", "coastal_shp") with fiona.open(coastal_shape_filename, 'r') as coastal_shape: # Convert from fiona dictionary to shapely geometry and reproject coastal_shape = reproject(shape(coastal_shape[0]['geometry']), pyproj.Proj(coastal_shape.crs), working_proj) # Simplify coastal boundary - makes things run ~20X faster log.info("Simplifying coastal boundary") coastal_shape = coastal_shape.buffer(10000).simplify(10000) log.info("Generating alpha shapes") # Process a single species. # This gets run off the main thread. def process_spno(spno): session = get_session() try: # Get raw points from DB raw_points = get_species_points(session, spno) if len(raw_points) < 4: # Not enough points to create an alpha hull return # Read points from database points = [reproject(p, db_proj, working_proj) for p in raw_points] # Generate alpha shape alpha_shp = make_alpha_hull( points = points, coastal_shape = coastal_shape, thinning_distance = tsx.config.config.getfloat('processing.alpha_hull', 'thinning_distance'), alpha = tsx.config.config.getfloat('processing.alpha_hull', 'alpha'), hullbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'hullbuffer_distance'), isolatedbuffer_distance = tsx.config.config.getfloat('processing.alpha_hull', 'isolatedbuffer_distance')) # Convert back to DB projection alpha_shp = reproject(alpha_shp, working_proj, db_proj) # Clean up geometry alpha_shp = alpha_shp.buffer(0) # Get range polygons to intersect with alpha shape for taxon_id, range_id, breeding_range_id, geom_wkb in get_species_range_polygons(session, spno): # Intersect and insert into DB geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer(0) geom = to_multipolygon(geom.intersection(alpha_shp)) # slow if len(geom) > 0: session.execute("""INSERT INTO taxon_presence_alpha_hull (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': taxon_id, 'range_id': range_id, 'breeding_range_id': breeding_range_id, 'geom_wkb': shapely.wkb.dumps(geom) } ) # We also subdivide the geometries into small pieces and insert this into the database. This allows for much faster # spatial queries in the database. for subgeom in subdivide_geometry(geom, max_points = 100): session.execute("""INSERT INTO taxon_presence_alpha_hull_subdiv (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': taxon_id, 'range_id': range_id, 'breeding_range_id': breeding_range_id, 'geom_wkb': shapely.wkb.dumps(subgeom) } ) if commit: session.commit() except: log.exception("Exception processing alpha hull") raise finally: session.close() # This is important because we are about to spawn child processes, and this stops them attempting to share the # same database connection pool session.close() tsx.db.connect.engine.dispose() # Process all the species in parallel for result, error in tqdm(run_parallel(process_spno, species, use_processes = True), total = len(species)): if error: print error
def process(taxon_id, coastal_shape, data_type, commit): session = get_session() try: # Load core range geometry core_range_geom = reproject( get_core_range_geometry(session, taxon_id), to_working_transformer).buffer(0).intersection(coastal_shape) for source_id in get_source_ids(session, data_type, taxon_id): log.info("Processing taxon_id: %s, source_id: %s" % (taxon_id, source_id)) # Get raw points from DB raw_points = get_raw_points(session, data_type, taxon_id, source_id) empty = len(raw_points) < 4 if empty: log.info( "Taxon %s: not enough points to create alpha hull (%s)" % (taxon_id, len(raw_points))) if not empty: # Read points from database points = [ reproject(p, to_working_transformer) for p in raw_points ] # Generate alpha shape alpha_shp = make_alpha_hull( points=points, coastal_shape=None, thinning_distance=tsx.config.config.getfloat( 'processing.alpha_hull', 'thinning_distance'), alpha=tsx.config.config.getfloat('processing.alpha_hull', 'alpha'), hullbuffer_distance=tsx.config.config.getfloat( 'processing.alpha_hull', 'hullbuffer_distance'), isolatedbuffer_distance=tsx.config.config.getfloat( 'processing.alpha_hull', 'isolatedbuffer_distance')) # Clean up geometry alpha_shp = alpha_shp.buffer(0) if core_range_geom.area == 0: log.info("Core range geometry area is zero") empty = True else: # Intersect alpha hull with core range intersected_alpha = to_multipolygon( core_range_geom.intersection(alpha_shp)) empty = intersected_alpha.is_empty if empty: session.execute( """INSERT INTO taxon_source_alpha_hull (source_id, taxon_id, data_type, core_range_area_in_m2, alpha_hull_area_in_m2) VALUES (:source_id, :taxon_id, :data_type, 0, 0)""", { 'source_id': source_id, 'taxon_id': taxon_id, 'data_type': data_type }) else: session.execute( """INSERT INTO taxon_source_alpha_hull (source_id, taxon_id, data_type, geometry, core_range_area_in_m2, alpha_hull_area_in_m2) VALUES (:source_id, :taxon_id, :data_type, ST_GeomFromWKB(_BINARY :geom_wkb), :core_range_area, :alpha_hull_area)""", { 'source_id': source_id, 'taxon_id': taxon_id, 'data_type': data_type, 'geom_wkb': shapely.wkb.dumps( reproject(intersected_alpha, to_db_transformer)), 'core_range_area': core_range_geom.area, 'alpha_hull_area': intersected_alpha.area }) if commit: session.commit() except: log.exception("Exception processing alpha hull") raise finally: session.close()
def process_taxon(taxon_id, commit): try: session = get_session() taxon = session.query(Taxon).get(taxon_id) for range_id, breeding_range_id, geom_wkb in get_taxon_range_polygons( session, taxon.id): if not taxon.ultrataxon: log.info("Skipping range polygon for non-ultrataxon: %s" % taxon.id) continue # tqdm.write("Taxon: %s, range: %s" % (taxon.id, range_id)) cache = {} geom = shapely.wkb.loads(binascii.unhexlify(geom_wkb)).buffer( 0) # Ensure valid bounds_wkb = shapely.wkb.dumps(Polygon.from_bounds(*geom.bounds)) # Get all sightings matching that taxon q = session.execute( """SELECT t2_sighting.id, ST_X(coords), ST_Y(coords) FROM t2_sighting, t2_survey WHERE t2_sighting.survey_id = t2_survey.id AND t2_sighting.taxon_id = :taxon_id AND t2_sighting.taxon_id NOT IN (SELECT taxon_id FROM t2_ultrataxon_sighting WHERE sighting_id = t2_sighting.id) AND MBRContains(ST_GeomFromWKB(_BINARY :bounds_wkb), coords) """, { 'taxon_id': taxon.id, 'bounds_wkb': bounds_wkb }) records = [] for sighting_id, x, y in q.fetchall(): if point_intersects_geom(geom, x, y, cache): records.append( T2UltrataxonSighting(sighting_id=sighting_id, taxon_id=taxon.id, range_id=range_id, generated_subspecies=False)) if taxon.taxon_level.description == 'ssp': # Get parent taxa (species) sightings q = session.execute( """SELECT t2_sighting.id, ST_X(coords), ST_Y(coords) FROM t2_sighting, t2_survey, taxon taxon_ssp, taxon taxon_sp WHERE t2_sighting.survey_id = t2_survey.id AND t2_sighting.taxon_id = taxon_sp.id AND taxon_ssp.id NOT IN (SELECT taxon_id FROM t2_ultrataxon_sighting WHERE sighting_id = t2_sighting.id) AND taxon_sp.taxon_level_id = (SELECT id FROM taxon_level WHERE description = 'sp') AND taxon_sp.spno = taxon_ssp.spno AND taxon_ssp.id = :taxon_id AND MBRContains(ST_GeomFromWKB(_BINARY :bounds_wkb), coords) """, { 'taxon_id': taxon.id, 'bounds_wkb': bounds_wkb }) for sighting_id, x, y in q.fetchall(): if point_intersects_geom(geom, x, y, cache): records.append( T2UltrataxonSighting(sighting_id=sighting_id, taxon_id=taxon.id, range_id=range_id, generated_subspecies=True)) session.bulk_save_objects(records) if commit: session.commit() except Exception as e: log.exception('Exception in range and ultrataxon processing') raise finally: session.close()
def build_filter_string(): filter_str = "" #spno filters = [] if request.args.has_key('spno'): _sp_no = request.args.get('spno', type=int) filters.append("SpNo=='%d'" % (_sp_no)) if request.args.has_key('datatype'): _sp_no = request.args.get('datatype', type=int) filters.append("DataType=='%d'" % (_sp_no)) #state if request.args.has_key('state'): _stateList = request.args.get('state', type=str).split('+') filters.append("(%s)" % " or ".join(["State=='%s'" % s for s in _stateList])) #searchtypedesc if request.args.has_key('searchtype'): _search_type = request.args.get('searchtype', type=int) # find in database session = get_session() _search_type_desc = session.execute( """SELECT * FROM search_type WHERE id = :searchtypeid""", { 'searchtypeid': _search_type }).fetchone()['description'] filters.append("SearchTypeDesc=='%s'" % (_search_type_desc)) session.close() #subibra if request.args.has_key('subibra'): _subibra = request.args.get('subibra', type=str) filters.append("SubIBRA=='%s'" % (_subibra)) #sourceid if request.args.has_key('sourceid'): _sourceid = request.args.get('sourceid', type=int) filters.append("SourceID=='%d'" % (_sourceid)) # Functional group if request.args.has_key('group'): _group = request.args.get('group', type=str) filters.append("FunctionalGroup=='%s'" % (_group)) # functional subgroup if request.args.has_key('subgroup'): _subgroup = request.args.get('subgroup', type=str) filters.append("FunctionalSubGroup=='%s'" % (_subgroup)) # status/statusauth if request.args.has_key('status') and request.args.has_key( 'statusauth'): #IUCN, EPBC, BirdLifeAustralia, Max _statusList = request.args.get('status', type=str).split('+') _statusauth = request.args.get('statusauth', type=str) filters.append("(%s)" % " or ".join( ["%sStatus=='%s'" % (_statusauth, s) for s in _statusList])) # national priority if request.args.has_key('priority'): filters.append("NationalPriorityTaxa=='%d'" % (request.args.get('priority', type=int))) if len(filters) > 0: return " and ".join(filters) else: return None
def get_stats(filtered_data): df = filtered_data years = [col for col in df.columns if col.isdigit()] int_years = [int(year) for year in years] year_df = df.loc[:, years] * 0 + int_years # Time series length ts_length = df[ 'TimeSeriesLength'] # year_df.max(axis = 1) - year_df.min(axis = 1) + 1 # Time series sample years ts_years = df['TimeSeriesSampleYears'] # (year_df * 0 + 1).sum(axis = 1) n_sources = df['SourceDesc'].nunique() n_taxa = df['TaxonID'].nunique() grouped_by_taxon = df.groupby('TaxonID').agg({ 'TimeSeriesLength': np.mean, 'SourceDesc': lambda x: x.nunique(), 'TimeSeriesID': lambda x: x.nunique(), 'SpatialRepresentativeness': np.mean }).rename( columns={ 'TimeSeriesLength': 'ts_length_mean', 'SourceDesc': 'num_sources', 'TimeSeriesID': 'num_ts', 'SpatialRepresentativeness': 'spatial_rep' }) session = get_session() result = session.execute( """SELECT id AS 'TaxonID', common_name, scientific_name, bird_group, bird_sub_group, (SELECT description FROM taxon_status WHERE taxon_status.id = aust_status_id) AS aust_status, (SELECT description FROM taxon_status WHERE taxon_status.id = epbc_status_id) AS epbc_status FROM taxon WHERE GREATEST(COALESCE(aust_status_id, 0), COALESCE(epbc_status_id, 0), COALESCE(iucn_status_id, 0)) NOT IN (0,1,7) AND (ultrataxon OR taxon.id IN :taxon_ids)""", {'taxon_ids': list(df['TaxonID'].unique())}) session.close() all_taxa = pd.DataFrame.from_records(data=result.fetchall(), index='TaxonID', columns=result.keys()) joined = all_taxa.join(grouped_by_taxon, how='outer').sort_values( ['bird_group', 'bird_sub_group', 'common_name'], na_position='first') joined = joined.reset_index().rename(columns={'TaxonID': 'taxon_id'}) joined['spatial_rep'] *= 100 taxa_with_data = joined.query('num_ts > 0') taxa_without_data = joined.query('num_ts != num_ts').drop( columns=['ts_length_mean', 'num_sources', 'num_ts', 'spatial_rep']) return { 'num_sources': df['SourceDesc'].nunique(), 'num_taxa': df['TaxonID'].nunique(), 'ts_length': { 'mean': ts_length.mean(), 'stddev': ts_length.std() }, 'ts_years': { 'mean': ts_years.mean(), 'stddev': ts_years.std() }, 'taxa_with_data': taxa_with_data.to_dict(orient='records'), 'taxa_without_data': taxa_without_data.to_dict(orient='records') }