Пример #1
0
class StreetIndex(object):
    def __init__(self, streets_file):
        self.idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = street['properties']['id']
                street_shape = asShape(street['geometry'])
                for i in range(len(street_shape.geoms)):
                    seg_id = self.encode_seg_id(i, street_id)
                    self.idx.insert(seg_id, street_shape.geoms[i].coords[0])
                    self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1])

        self.bb_idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = int(street['properties']['id'])
                street_shape = asShape(street['geometry'])
                self.bb_idx.insert(street_id, list(street_shape.bounds))

    def encode_seg_id(self, i, street_id):
        return i * 1000000 + int(street_id)

    def decode_seg_id(self, seg_id):
        i = abs(seg_id) / 1000000
        return abs(seg_id) - i

    def find_nearest_street(self, shape):
        shape = asShape(shape['geometry'])
        shape_type = shape.geom_type
        if shape_type == 'Polygon' or shape_type == 'MultiPolygon':
            ref_point = (
                float(shape.centroid.coords.xy[0][0]),
                float(shape.centroid.coords.xy[1][0])
            )
        else:
            ref_point = (
                float(shape.coords.xy[0][0]),
                float(shape.coords.xy[1][0])
            )
        street_id = list(self.bb_idx.nearest(ref_point))[0]
        return str(street_id)

    def find_connected_street(self, street):
        street_id = int(street['properties']['id'])
        street_shape = asShape(street['geometry'])
        street_start = street_shape.geoms[0].coords[0]
        street_end = street_shape.geoms[-1].coords[-1]
        seg_ids = list(self.idx.intersection(street_start))
        seg_ids += list(self.idx.intersection(street_end))
        street_ids = set(map(self.decode_seg_id, seg_ids))
        if street_id in street_ids:
            street_ids.remove(street_id)
        return street_ids
Пример #2
0
def snap_to_edge_position(gdf, points, k=3, rtree=None):
    """
    Snap given points in the plane to edges in GeoDataFrame of edges.

    Parameters
    ----------
    gdf : GeoDataframe
        The edges of spatial network as a Geodataframe.
    points : array of floats, shape (M, 2)
        The cartesian coordinates of the points to be snapped.
    k : integer, optional
        Number of nearest edges to consider.

    Returns
    -------
    nearest_edges : list of integers, length M
        Indices of nearest edges in the GeoDataframe.
    refdistances : list of floats, length M
        Linear referencing distances of points along nearest edge.
    """
    X, Y = points.T
    geom = gdf["geometry"]

    # If not passed, build the r-tree spatial index by position for subsequent iloc
    if rtree == None:
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geom.bounds.values):
            rtree.insert(pos, bounds)

    # use r-tree to find possible nearest neighbors, one point at a time,
    # then minimize euclidean distance from point to the possible matches
    nearest_edges = list()
    refdistances = list()
    for xy in zip(X, Y):
        p = Point(xy)
        dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p)
        ne = geom[dists.idxmin()]
        nearest_edges.append(dists.idxmin())
        refdistances.append(ne.project(p))

    return nearest_edges, refdistances
Пример #3
0
def nearest_edges(G, X, Y, interpolate=None, return_dist=False):
    """
    Find the nearest edge to a point or to each of several points.

    If `X` and `Y` are single coordinate values, this will return the nearest
    edge to that point. If `X` and `Y` are lists of coordinate values, this
    will return the nearest edge to each point.

    If `interpolate` is None, search for the nearest edge to each point, one
    at a time, using an r-tree and minimizing the euclidean distances from the
    point to the possible matches. For accuracy, use a projected graph and
    points. This method is precise and also fastest if searching for few
    points relative to the graph's size.

    For a faster method if searching for many points relative to the graph's
    size, use the `interpolate` argument to interpolate points along the edges
    and index them. If the graph is projected, this uses a k-d tree for
    euclidean nearest neighbor search, which requires that scipy is installed
    as an optional dependency. If graph is unprojected, this uses a ball tree
    for haversine nearest neighbor search, which requires that scikit-learn is
    installed as an optional dependency.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        graph in which to find nearest edges
    X : float or list
        points' x (longitude) coordinates, in same CRS/units as graph and
        containing no nulls
    Y : float or list
        points' y (latitude) coordinates, in same CRS/units as graph and
        containing no nulls
    interpolate : float
        spacing distance between interpolated points, in same units as graph.
        smaller values generate more points.
    return_dist : bool
        optionally also return distance between points and nearest edges

    Returns
    -------
    ne or (ne, dist) : tuple or list
        nearest edges as (u, v, key) or optionally a tuple where `dist`
        contains distances between the points and their nearest edges
    """
    is_scalar = False
    if not (hasattr(X, "__iter__") and hasattr(Y, "__iter__")):
        # make coordinates arrays if user passed non-iterable values
        is_scalar = True
        X = np.array([X])
        Y = np.array([Y])

    if np.isnan(X).any() or np.isnan(Y).any():  # pragma: no cover
        raise ValueError("`X` and `Y` cannot contain nulls")
    geoms = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"]

    # if no interpolation distance was provided
    if interpolate is None:

        # build the r-tree spatial index by position for subsequent iloc
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geoms.bounds.values):
            rtree.insert(pos, bounds)

        # use r-tree to find possible nearest neighbors, one point at a time,
        # then minimize euclidean distance from point to the possible matches
        ne_dist = list()
        for xy in zip(X, Y):
            dists = geoms.iloc[list(rtree.nearest(xy))].distance(Point(xy))
            ne_dist.append((dists.idxmin(), dists.min()))
        ne, dist = zip(*ne_dist)

    # otherwise, if interpolation distance was provided
    else:

        # interpolate points along edges to index with k-d tree or ball tree
        uvk_xy = list()
        for uvk, geom in zip(geoms.index, geoms.values):
            uvk_xy.extend(
                (uvk, xy)
                for xy in utils_geo.interpolate_points(geom, interpolate))
        labels, xy = zip(*uvk_xy)
        vertices = pd.DataFrame(xy, index=labels, columns=["x", "y"])

        if projection.is_projected(G.graph["crs"]):
            # if projected, use k-d tree for euclidean nearest-neighbor search
            if cKDTree is None:  # pragma: no cover
                raise ImportError(
                    "scipy must be installed to search a projected graph")
            dist, pos = cKDTree(vertices).query(np.array([X, Y]).T, k=1)
            ne = vertices.index[pos]

        else:
            # if unprojected, use ball tree for haversine nearest-neighbor search
            if BallTree is None:  # pragma: no cover
                raise ImportError(
                    "scikit-learn must be installed to search an unprojected graph"
                )
            # haversine requires lat, lng coords in radians
            vertices_rad = np.deg2rad(vertices[["y", "x"]])
            points_rad = np.deg2rad(np.array([Y, X]).T)
            dist, pos = BallTree(vertices_rad,
                                 metric="haversine").query(points_rad, k=1)
            dist = dist[:, 0] * EARTH_RADIUS_M  # convert radians -> meters
            ne = vertices.index[pos[:, 0]]

    # convert results to correct types for return
    ne = list(ne)
    dist = list(dist)
    if is_scalar:
        ne = ne[0]
        dist = dist[0]

    if return_dist:
        return ne, dist
    else:
        return ne
Пример #4
0
def parse_temperatures(database: SqliteUtil, tmin_files: List[str],
                       tmax_files: List[str], steps: int, day: int,
                       src_epsg: int, prj_epsg: int):

    log.info('Allocating tables for air temperatures.')
    create_tables(database)

    files = zip(tmax_files, tmin_files)
    profile_count = 0
    point_count = 0
    temperatures = []
    points = []
    profiles = {}
    n = 1

    transformer = Transformer.from_crs(f'epsg:{src_epsg}',
                                       f'epsg:{prj_epsg}',
                                       always_xy=True,
                                       skip_equivalent=True)
    project = transformer.transform

    def apply(id: int, temp: Callable):
        for step in range(steps):
            prop = step / steps
            row = (id, step, int(86400 * prop), temp(24 * prop))
            yield row

    log.info('Loading temperatures from netCDF4 files.')
    for tmax_file, tmin_file in files:
        tmaxnc = Dataset(tmax_file, 'r')
        tminnc = Dataset(tmin_file, 'r')

        lons = tmaxnc.variables['lon']
        lats = tmaxnc.variables['lat']
        shape = tmaxnc.variables['tmax'].shape

        tmaxs = tmaxnc.variables['tmax'][day]
        tmins = tminnc.variables['tmin'][day]

        for i in range(shape[1]):
            for j in range(shape[2]):
                tmax = tmaxs[i][j]
                tmin = tmins[i][j]

                if tmax != -9999.0:
                    x, y = project(lons[i][j], lats[i][j])
                    idx = f'{tmax}-{tmin}'

                    if idx not in profiles:
                        temp = iterpolation(tmin, tmax, 5, 15)
                        temperatures.extend(apply(profile_count, temp))
                        profiles[idx] = profile_count
                        profile_count += 1

                    profile = profiles[idx]
                    point = Point(point_count, x, y, profile)
                    points.append(point)
                    point_count += 1

                    if point_count == n:
                        log.info(
                            f'Loading air temperature reading {point_count}.')
                        n <<= 1

        tmaxnc.close()
        tminnc.close()

    if point_count != n >> 1:
        log.info(f'Loading air temperature reading {point_count}.')

    def load():
        for point in points:
            x, y = point.x, point.y
            yield (point.id, (x, y, x, y), point.profile)

    log.info('Starting network update for air temperatures.')
    log.info('Building spatial index from temperature profile locations.')
    index = Index(load())
    used = set()

    log.info('Loading network links.')
    links = load_links(database)

    log.info('Applying temperature profiles to links.')
    iter_links = counter(links, 'Applying profile to link %s.')
    for link in iter_links:
        result = index.nearest((link.x, link.y, link.x, link.y), objects=True)
        profile = next(result).object
        link.air_temperature = profile
        used.add(profile)

    def dump_links():
        for link in links:
            yield (link.id, link.air_temperature)

    log.info('Writing updated links to database.')
    database.insert_values('temp_links', dump_links(), 2)
    database.connection.commit()
    del links

    log.info('Loading network parcels.')
    parcels = load_parcels(database)

    residential = profile_count
    temperatures.extend(apply(profile_count, lambda x: 26.6667))
    profile_count += 1
    commercial = profile_count
    temperatures.extend(apply(profile_count, lambda x: 26.6667))
    profile_count += 1
    other = profile_count
    temperatures.extend(apply(profile_count, lambda x: 26.6667))
    profile_count += 1
    used.add(residential)
    used.add(commercial)
    used.add(other)

    log.info('Applying temperature profiles to parcels.')
    iter_parcels = counter(parcels, 'Applying profile to parcel %s.')
    for parcel in iter_parcels:
        if not parcel.cooling:
            x, y = xy(parcel.center)
            result = index.nearest((x, y, x, y), objects=True)
            profile = next(result).object
            parcel.air_temperature = profile
            used.add(profile)
        elif parcel.kind == 'residential':
            parcel.air_temperature = residential
        elif parcel.kind == 'commercial':
            parcel.air_temperature = commercial
        else:
            parcel.air_temperature = other

    def dump_parcels():
        for parcel in parcels:
            yield (parcel.apn, parcel.air_temperature)

    log.info('Writing updated parcels to database.')
    database.insert_values('temp_parcels', dump_parcels(), 2)
    database.connection.commit()
    del parcels

    def dump_temperatures():
        for temp in temperatures:
            if temp[0] in used:
                yield temp

    log.info('Writing parsed air temperatures to database.')
    database.insert_values('air_temperatures', dump_temperatures(), 4)
    database.connection.commit()
    del temperatures

    log.info('Merging, dropping and renaming old tables.')

    query = '''
        CREATE INDEX temp_links_link
        ON temp_links(link_id);
    '''
    database.cursor.execute(query)
    query = '''
        CREATE TABLE temp_links_merged
        AS SELECT
            links.link_id,
            links.source_node,
            links.terminal_node,
            links.length,
            links.freespeed,
            links.capacity,
            links.permlanes,
            links.oneway,
            links.modes,
            temp_links.air_temperature,
            links.mrt_temperature
        FROM links
        INNER JOIN temp_links
        USING(link_id);
    '''
    database.cursor.execute(query)
    query = '''
        CREATE INDEX temp_parcels_parcel
        ON temp_parcels(apn);
    '''
    database.cursor.execute(query)
    query = '''
        CREATE TABLE temp_parcels_merged
        AS SELECT
            parcels.apn,
            parcels.maz,
            parcels.type,
            parcels.cooling,
            temp_parcels.air_temperature,
            parcels.mrt_temperature,
            parcels.center,
            parcels.region
        FROM parcels
        INNER JOIN temp_parcels
        USING(apn);
    '''
    database.cursor.execute(query)

    original = database.count_rows('links')
    merged = database.count_rows('temp_links_merged')
    if original != merged:
        log.error('Original links and updated links tables '
                  'do not align; quiting to prevent data loss.')
        raise RuntimeError
    else:
        database.drop_table('links', 'temp_links')
        query = '''
            ALTER TABLE temp_links_merged
            RENAME TO links;
        '''
        database.cursor.execute(query)

    original = database.count_rows('parcels')
    merged = database.count_rows('temp_parcels_merged')
    if original != merged:
        log.error('Original parcels and updated parcels tables '
                  'do not align; quiting to prevent data loss.')
        raise RuntimeError
    else:
        database.drop_table('parcels', 'temp_parcels')
        query = '''
            ALTER TABLE temp_parcels_merged
            RENAME TO parcels;
        '''
        database.cursor.execute(query)

    database.connection.commit()

    log.info('Creating indexes on new tables.')
    create_indexes(database)

    log.info('Writing process metadata.')
Пример #5
0
def isolation(
    X,
    coordinates,
    metric="euclidean",
    middle="mean",
    return_all=False,
    progressbar=False,
):
    """
    Compute the isolation of each value of X by constructing the distance
    to the nearest higher value in the data.

    Parameters
    ----------
    X : numpy.ndarray
        (N, p) array of data to use as input. If p > 1, the "elevation" is computed
        using the topo.to_elevation function.
    coordinates : numpy.ndarray
        (N,k) array of locations for X to compute distances. If metric='precomputed', this
        should contain the distances from each point to every other point, and k == N.
    metric : string or callable (default: 'euclidean')
        name of distance metric in scipy.spatial.distance, or function, that can be
        used to compute distances between locations. If 'precomputed', ad-hoc function
        will be defined to look up distances between points instead.
    middle : string or callable (default: 'mean')
        method to define the elevation of points. See to_elevation for more details.
    return_all : bool (default: False)
        if False, only return the isolation (distance to nearest higher value).
    progressbar: bool (default: False)
        if True, show a progressbar for the computation.
    Returns
    -------
    either (N,) array of isolation values, or a pandas dataframe containing the full
    tree of precedence for the isolation tree.
    """
    X = check_array(X, ensure_2d=False)
    X = to_elevation(X, middle=middle).squeeze()
    try:
        from rtree.index import Index as SpatialIndex
    except ImportError:
        raise ImportError(
            "rtree library must be installed to use the prominence measure"
        )
    distance_func = _resolve_metric(X, coordinates, metric)
    sort_order = numpy.argsort(-X)
    tree = SpatialIndex()
    ix = sort_order[0]
    tree.insert(0, tuple(coordinates[ix]), obj=X[ix])
    precedence_tree = [[ix, numpy.nan, 0, numpy.nan, numpy.nan, numpy.nan]]

    if progressbar and HAS_TQDM:
        pbar = tqdm
    elif progressbar and (not HAS_TQDM):
        try:
            import tqdm
        except ImportError as e:
            raise ImportError("the tqdm module is required for progressbars")
    else:
        pbar = _passthrough

    for iter_ix, ix in pbar(enumerate(sort_order[1:])):
        rank = iter_ix + 1
        value = X[ix]
        location = coordinates[
            ix,
        ]
        (match,) = tree.nearest(tuple(location), objects=True)
        higher_rank = match.id
        higher_value = match.object
        higher_location = match.bbox[:2]
        higher_ix = sort_order[higher_rank]
        distance = distance_func(location, higher_location)
        gap = higher_value - value
        precedence_tree.append([ix, higher_ix, rank, higher_rank, distance, gap])
        tree.insert(rank, tuple(location), obj=value)
    # return precedence_tree
    precedence_tree = numpy.asarray(precedence_tree)
    # print(precedence_tree.shape)
    out = numpy.empty_like(precedence_tree)
    out[sort_order] = precedence_tree
    result = pandas.DataFrame(
        out,
        columns=["index", "parent_index", "rank", "parent_rank", "isolation", "gap"],
    ).sort_values(["index", "parent_index"])
    if return_all:
        return result
    else:
        return result.isolation.values