class StreetIndex(object): def __init__(self, streets_file): self.idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = street['properties']['id'] street_shape = asShape(street['geometry']) for i in range(len(street_shape.geoms)): seg_id = self.encode_seg_id(i, street_id) self.idx.insert(seg_id, street_shape.geoms[i].coords[0]) self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1]) self.bb_idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = int(street['properties']['id']) street_shape = asShape(street['geometry']) self.bb_idx.insert(street_id, list(street_shape.bounds)) def encode_seg_id(self, i, street_id): return i * 1000000 + int(street_id) def decode_seg_id(self, seg_id): i = abs(seg_id) / 1000000 return abs(seg_id) - i def find_nearest_street(self, shape): shape = asShape(shape['geometry']) shape_type = shape.geom_type if shape_type == 'Polygon' or shape_type == 'MultiPolygon': ref_point = ( float(shape.centroid.coords.xy[0][0]), float(shape.centroid.coords.xy[1][0]) ) else: ref_point = ( float(shape.coords.xy[0][0]), float(shape.coords.xy[1][0]) ) street_id = list(self.bb_idx.nearest(ref_point))[0] return str(street_id) def find_connected_street(self, street): street_id = int(street['properties']['id']) street_shape = asShape(street['geometry']) street_start = street_shape.geoms[0].coords[0] street_end = street_shape.geoms[-1].coords[-1] seg_ids = list(self.idx.intersection(street_start)) seg_ids += list(self.idx.intersection(street_end)) street_ids = set(map(self.decode_seg_id, seg_ids)) if street_id in street_ids: street_ids.remove(street_id) return street_ids
def snap_to_edge_position(gdf, points, k=3, rtree=None): """ Snap given points in the plane to edges in GeoDataFrame of edges. Parameters ---------- gdf : GeoDataframe The edges of spatial network as a Geodataframe. points : array of floats, shape (M, 2) The cartesian coordinates of the points to be snapped. k : integer, optional Number of nearest edges to consider. Returns ------- nearest_edges : list of integers, length M Indices of nearest edges in the GeoDataframe. refdistances : list of floats, length M Linear referencing distances of points along nearest edge. """ X, Y = points.T geom = gdf["geometry"] # If not passed, build the r-tree spatial index by position for subsequent iloc if rtree == None: rtree = RTreeIndex() for pos, bounds in enumerate(geom.bounds.values): rtree.insert(pos, bounds) # use r-tree to find possible nearest neighbors, one point at a time, # then minimize euclidean distance from point to the possible matches nearest_edges = list() refdistances = list() for xy in zip(X, Y): p = Point(xy) dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p) ne = geom[dists.idxmin()] nearest_edges.append(dists.idxmin()) refdistances.append(ne.project(p)) return nearest_edges, refdistances
def nearest_edges(G, X, Y, interpolate=None, return_dist=False): """ Find the nearest edge to a point or to each of several points. If `X` and `Y` are single coordinate values, this will return the nearest edge to that point. If `X` and `Y` are lists of coordinate values, this will return the nearest edge to each point. If `interpolate` is None, search for the nearest edge to each point, one at a time, using an r-tree and minimizing the euclidean distances from the point to the possible matches. For accuracy, use a projected graph and points. This method is precise and also fastest if searching for few points relative to the graph's size. For a faster method if searching for many points relative to the graph's size, use the `interpolate` argument to interpolate points along the edges and index them. If the graph is projected, this uses a k-d tree for euclidean nearest neighbor search, which requires that scipy is installed as an optional dependency. If graph is unprojected, this uses a ball tree for haversine nearest neighbor search, which requires that scikit-learn is installed as an optional dependency. Parameters ---------- G : networkx.MultiDiGraph graph in which to find nearest edges X : float or list points' x (longitude) coordinates, in same CRS/units as graph and containing no nulls Y : float or list points' y (latitude) coordinates, in same CRS/units as graph and containing no nulls interpolate : float spacing distance between interpolated points, in same units as graph. smaller values generate more points. return_dist : bool optionally also return distance between points and nearest edges Returns ------- ne or (ne, dist) : tuple or list nearest edges as (u, v, key) or optionally a tuple where `dist` contains distances between the points and their nearest edges """ is_scalar = False if not (hasattr(X, "__iter__") and hasattr(Y, "__iter__")): # make coordinates arrays if user passed non-iterable values is_scalar = True X = np.array([X]) Y = np.array([Y]) if np.isnan(X).any() or np.isnan(Y).any(): # pragma: no cover raise ValueError("`X` and `Y` cannot contain nulls") geoms = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"] # if no interpolation distance was provided if interpolate is None: # build the r-tree spatial index by position for subsequent iloc rtree = RTreeIndex() for pos, bounds in enumerate(geoms.bounds.values): rtree.insert(pos, bounds) # use r-tree to find possible nearest neighbors, one point at a time, # then minimize euclidean distance from point to the possible matches ne_dist = list() for xy in zip(X, Y): dists = geoms.iloc[list(rtree.nearest(xy))].distance(Point(xy)) ne_dist.append((dists.idxmin(), dists.min())) ne, dist = zip(*ne_dist) # otherwise, if interpolation distance was provided else: # interpolate points along edges to index with k-d tree or ball tree uvk_xy = list() for uvk, geom in zip(geoms.index, geoms.values): uvk_xy.extend( (uvk, xy) for xy in utils_geo.interpolate_points(geom, interpolate)) labels, xy = zip(*uvk_xy) vertices = pd.DataFrame(xy, index=labels, columns=["x", "y"]) if projection.is_projected(G.graph["crs"]): # if projected, use k-d tree for euclidean nearest-neighbor search if cKDTree is None: # pragma: no cover raise ImportError( "scipy must be installed to search a projected graph") dist, pos = cKDTree(vertices).query(np.array([X, Y]).T, k=1) ne = vertices.index[pos] else: # if unprojected, use ball tree for haversine nearest-neighbor search if BallTree is None: # pragma: no cover raise ImportError( "scikit-learn must be installed to search an unprojected graph" ) # haversine requires lat, lng coords in radians vertices_rad = np.deg2rad(vertices[["y", "x"]]) points_rad = np.deg2rad(np.array([Y, X]).T) dist, pos = BallTree(vertices_rad, metric="haversine").query(points_rad, k=1) dist = dist[:, 0] * EARTH_RADIUS_M # convert radians -> meters ne = vertices.index[pos[:, 0]] # convert results to correct types for return ne = list(ne) dist = list(dist) if is_scalar: ne = ne[0] dist = dist[0] if return_dist: return ne, dist else: return ne
def parse_temperatures(database: SqliteUtil, tmin_files: List[str], tmax_files: List[str], steps: int, day: int, src_epsg: int, prj_epsg: int): log.info('Allocating tables for air temperatures.') create_tables(database) files = zip(tmax_files, tmin_files) profile_count = 0 point_count = 0 temperatures = [] points = [] profiles = {} n = 1 transformer = Transformer.from_crs(f'epsg:{src_epsg}', f'epsg:{prj_epsg}', always_xy=True, skip_equivalent=True) project = transformer.transform def apply(id: int, temp: Callable): for step in range(steps): prop = step / steps row = (id, step, int(86400 * prop), temp(24 * prop)) yield row log.info('Loading temperatures from netCDF4 files.') for tmax_file, tmin_file in files: tmaxnc = Dataset(tmax_file, 'r') tminnc = Dataset(tmin_file, 'r') lons = tmaxnc.variables['lon'] lats = tmaxnc.variables['lat'] shape = tmaxnc.variables['tmax'].shape tmaxs = tmaxnc.variables['tmax'][day] tmins = tminnc.variables['tmin'][day] for i in range(shape[1]): for j in range(shape[2]): tmax = tmaxs[i][j] tmin = tmins[i][j] if tmax != -9999.0: x, y = project(lons[i][j], lats[i][j]) idx = f'{tmax}-{tmin}' if idx not in profiles: temp = iterpolation(tmin, tmax, 5, 15) temperatures.extend(apply(profile_count, temp)) profiles[idx] = profile_count profile_count += 1 profile = profiles[idx] point = Point(point_count, x, y, profile) points.append(point) point_count += 1 if point_count == n: log.info( f'Loading air temperature reading {point_count}.') n <<= 1 tmaxnc.close() tminnc.close() if point_count != n >> 1: log.info(f'Loading air temperature reading {point_count}.') def load(): for point in points: x, y = point.x, point.y yield (point.id, (x, y, x, y), point.profile) log.info('Starting network update for air temperatures.') log.info('Building spatial index from temperature profile locations.') index = Index(load()) used = set() log.info('Loading network links.') links = load_links(database) log.info('Applying temperature profiles to links.') iter_links = counter(links, 'Applying profile to link %s.') for link in iter_links: result = index.nearest((link.x, link.y, link.x, link.y), objects=True) profile = next(result).object link.air_temperature = profile used.add(profile) def dump_links(): for link in links: yield (link.id, link.air_temperature) log.info('Writing updated links to database.') database.insert_values('temp_links', dump_links(), 2) database.connection.commit() del links log.info('Loading network parcels.') parcels = load_parcels(database) residential = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 commercial = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 other = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 used.add(residential) used.add(commercial) used.add(other) log.info('Applying temperature profiles to parcels.') iter_parcels = counter(parcels, 'Applying profile to parcel %s.') for parcel in iter_parcels: if not parcel.cooling: x, y = xy(parcel.center) result = index.nearest((x, y, x, y), objects=True) profile = next(result).object parcel.air_temperature = profile used.add(profile) elif parcel.kind == 'residential': parcel.air_temperature = residential elif parcel.kind == 'commercial': parcel.air_temperature = commercial else: parcel.air_temperature = other def dump_parcels(): for parcel in parcels: yield (parcel.apn, parcel.air_temperature) log.info('Writing updated parcels to database.') database.insert_values('temp_parcels', dump_parcels(), 2) database.connection.commit() del parcels def dump_temperatures(): for temp in temperatures: if temp[0] in used: yield temp log.info('Writing parsed air temperatures to database.') database.insert_values('air_temperatures', dump_temperatures(), 4) database.connection.commit() del temperatures log.info('Merging, dropping and renaming old tables.') query = ''' CREATE INDEX temp_links_link ON temp_links(link_id); ''' database.cursor.execute(query) query = ''' CREATE TABLE temp_links_merged AS SELECT links.link_id, links.source_node, links.terminal_node, links.length, links.freespeed, links.capacity, links.permlanes, links.oneway, links.modes, temp_links.air_temperature, links.mrt_temperature FROM links INNER JOIN temp_links USING(link_id); ''' database.cursor.execute(query) query = ''' CREATE INDEX temp_parcels_parcel ON temp_parcels(apn); ''' database.cursor.execute(query) query = ''' CREATE TABLE temp_parcels_merged AS SELECT parcels.apn, parcels.maz, parcels.type, parcels.cooling, temp_parcels.air_temperature, parcels.mrt_temperature, parcels.center, parcels.region FROM parcels INNER JOIN temp_parcels USING(apn); ''' database.cursor.execute(query) original = database.count_rows('links') merged = database.count_rows('temp_links_merged') if original != merged: log.error('Original links and updated links tables ' 'do not align; quiting to prevent data loss.') raise RuntimeError else: database.drop_table('links', 'temp_links') query = ''' ALTER TABLE temp_links_merged RENAME TO links; ''' database.cursor.execute(query) original = database.count_rows('parcels') merged = database.count_rows('temp_parcels_merged') if original != merged: log.error('Original parcels and updated parcels tables ' 'do not align; quiting to prevent data loss.') raise RuntimeError else: database.drop_table('parcels', 'temp_parcels') query = ''' ALTER TABLE temp_parcels_merged RENAME TO parcels; ''' database.cursor.execute(query) database.connection.commit() log.info('Creating indexes on new tables.') create_indexes(database) log.info('Writing process metadata.')
def isolation( X, coordinates, metric="euclidean", middle="mean", return_all=False, progressbar=False, ): """ Compute the isolation of each value of X by constructing the distance to the nearest higher value in the data. Parameters ---------- X : numpy.ndarray (N, p) array of data to use as input. If p > 1, the "elevation" is computed using the topo.to_elevation function. coordinates : numpy.ndarray (N,k) array of locations for X to compute distances. If metric='precomputed', this should contain the distances from each point to every other point, and k == N. metric : string or callable (default: 'euclidean') name of distance metric in scipy.spatial.distance, or function, that can be used to compute distances between locations. If 'precomputed', ad-hoc function will be defined to look up distances between points instead. middle : string or callable (default: 'mean') method to define the elevation of points. See to_elevation for more details. return_all : bool (default: False) if False, only return the isolation (distance to nearest higher value). progressbar: bool (default: False) if True, show a progressbar for the computation. Returns ------- either (N,) array of isolation values, or a pandas dataframe containing the full tree of precedence for the isolation tree. """ X = check_array(X, ensure_2d=False) X = to_elevation(X, middle=middle).squeeze() try: from rtree.index import Index as SpatialIndex except ImportError: raise ImportError( "rtree library must be installed to use the prominence measure" ) distance_func = _resolve_metric(X, coordinates, metric) sort_order = numpy.argsort(-X) tree = SpatialIndex() ix = sort_order[0] tree.insert(0, tuple(coordinates[ix]), obj=X[ix]) precedence_tree = [[ix, numpy.nan, 0, numpy.nan, numpy.nan, numpy.nan]] if progressbar and HAS_TQDM: pbar = tqdm elif progressbar and (not HAS_TQDM): try: import tqdm except ImportError as e: raise ImportError("the tqdm module is required for progressbars") else: pbar = _passthrough for iter_ix, ix in pbar(enumerate(sort_order[1:])): rank = iter_ix + 1 value = X[ix] location = coordinates[ ix, ] (match,) = tree.nearest(tuple(location), objects=True) higher_rank = match.id higher_value = match.object higher_location = match.bbox[:2] higher_ix = sort_order[higher_rank] distance = distance_func(location, higher_location) gap = higher_value - value precedence_tree.append([ix, higher_ix, rank, higher_rank, distance, gap]) tree.insert(rank, tuple(location), obj=value) # return precedence_tree precedence_tree = numpy.asarray(precedence_tree) # print(precedence_tree.shape) out = numpy.empty_like(precedence_tree) out[sort_order] = precedence_tree result = pandas.DataFrame( out, columns=["index", "parent_index", "rank", "parent_rank", "isolation", "gap"], ).sort_values(["index", "parent_index"]) if return_all: return result else: return result.isolation.values