Exemplo n.º 1
0
def get_rtree_index(
    items,
    objects=None,
    properties=None
):
    return (
        RTreeIndex(generate_index_entries(items, objects=objects), properties=RTreeIndexProperty(**properties)) if properties
        else RTreeIndex(generate_index_entries(items, objects=objects))
    )
Exemplo n.º 2
0
def snap_to_edge_position(gdf, points, k=3, rtree=None):
    """
    Snap given points in the plane to edges in GeoDataFrame of edges.

    Parameters
    ----------
    gdf : GeoDataframe
        The edges of spatial network as a Geodataframe.
    points : array of floats, shape (M, 2)
        The cartesian coordinates of the points to be snapped.
    k : integer, optional
        Number of nearest edges to consider.

    Returns
    -------
    nearest_edges : list of integers, length M
        Indices of nearest edges in the GeoDataframe.
    refdistances : list of floats, length M
        Linear referencing distances of points along nearest edge.
    """
    X, Y = points.T
    geom = gdf["geometry"]

    # If not passed, build the r-tree spatial index by position for subsequent iloc
    if rtree == None:
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geom.bounds.values):
            rtree.insert(pos, bounds)

    # use r-tree to find possible nearest neighbors, one point at a time,
    # then minimize euclidean distance from point to the possible matches
    nearest_edges = list()
    refdistances = list()
    for xy in zip(X, Y):
        p = Point(xy)
        dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p)
        ne = geom[dists.idxmin()]
        nearest_edges.append(dists.idxmin())
        refdistances.append(ne.project(p))

    return nearest_edges, refdistances
Exemplo n.º 3
0
    def __init__(self, context: DyCleeContext):
        self.context = context

        self.dense_µclusters: Set[MicroCluster] = Set()
        self.semidense_µclusters: Set[MicroCluster] = Set()
        self.outlier_µclusters: Set[MicroCluster] = Set()
        self.long_term_memory: Set[MicroCluster] = Set()
        self.eliminated: Set[MicroCluster] = Set()

        self.next_µcluster_index: int = 0
        self.next_class_label: int = 0
        self.n_steps: int = 0
        self.last_partitioning_step: int = 0
        self.last_density_step: int = 0

        if self.context.maintain_rtree:
            p = RTreeProperty(dimension=self.context.n_features)
            self.rtree = RTreeIndex(properties=p)
            # This mapping is used to retrieve microcluster objects from their hashes
            # stored with their locations in the R*-tree
            self.µcluster_map: Optional[dict[int, MicroCluster]] = {}
        else:
            self.rtree = None
            self.µcluster_map = None
Exemplo n.º 4
0
def nearest_edges(G, X, Y, interpolate=None, return_dist=False):
    """
    Find the nearest edge to a point or to each of several points.

    If `X` and `Y` are single coordinate values, this will return the nearest
    edge to that point. If `X` and `Y` are lists of coordinate values, this
    will return the nearest edge to each point.

    If `interpolate` is None, search for the nearest edge to each point, one
    at a time, using an r-tree and minimizing the euclidean distances from the
    point to the possible matches. For accuracy, use a projected graph and
    points. This method is precise and also fastest if searching for few
    points relative to the graph's size.

    For a faster method if searching for many points relative to the graph's
    size, use the `interpolate` argument to interpolate points along the edges
    and index them. If the graph is projected, this uses a k-d tree for
    euclidean nearest neighbor search, which requires that scipy is installed
    as an optional dependency. If graph is unprojected, this uses a ball tree
    for haversine nearest neighbor search, which requires that scikit-learn is
    installed as an optional dependency.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        graph in which to find nearest edges
    X : float or list
        points' x (longitude) coordinates, in same CRS/units as graph and
        containing no nulls
    Y : float or list
        points' y (latitude) coordinates, in same CRS/units as graph and
        containing no nulls
    interpolate : float
        spacing distance between interpolated points, in same units as graph.
        smaller values generate more points.
    return_dist : bool
        optionally also return distance between points and nearest edges

    Returns
    -------
    ne or (ne, dist) : tuple or list
        nearest edges as (u, v, key) or optionally a tuple where `dist`
        contains distances between the points and their nearest edges
    """
    is_scalar = False
    if not (hasattr(X, "__iter__") and hasattr(Y, "__iter__")):
        # make coordinates arrays if user passed non-iterable values
        is_scalar = True
        X = np.array([X])
        Y = np.array([Y])

    if np.isnan(X).any() or np.isnan(Y).any():  # pragma: no cover
        raise ValueError("`X` and `Y` cannot contain nulls")
    geoms = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"]

    # if no interpolation distance was provided
    if interpolate is None:

        # build the r-tree spatial index by position for subsequent iloc
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geoms.bounds.values):
            rtree.insert(pos, bounds)

        # use r-tree to find possible nearest neighbors, one point at a time,
        # then minimize euclidean distance from point to the possible matches
        ne_dist = list()
        for xy in zip(X, Y):
            dists = geoms.iloc[list(rtree.nearest(xy))].distance(Point(xy))
            ne_dist.append((dists.idxmin(), dists.min()))
        ne, dist = zip(*ne_dist)

    # otherwise, if interpolation distance was provided
    else:

        # interpolate points along edges to index with k-d tree or ball tree
        uvk_xy = list()
        for uvk, geom in zip(geoms.index, geoms.values):
            uvk_xy.extend(
                (uvk, xy)
                for xy in utils_geo.interpolate_points(geom, interpolate))
        labels, xy = zip(*uvk_xy)
        vertices = pd.DataFrame(xy, index=labels, columns=["x", "y"])

        if projection.is_projected(G.graph["crs"]):
            # if projected, use k-d tree for euclidean nearest-neighbor search
            if cKDTree is None:  # pragma: no cover
                raise ImportError(
                    "scipy must be installed to search a projected graph")
            dist, pos = cKDTree(vertices).query(np.array([X, Y]).T, k=1)
            ne = vertices.index[pos]

        else:
            # if unprojected, use ball tree for haversine nearest-neighbor search
            if BallTree is None:  # pragma: no cover
                raise ImportError(
                    "scikit-learn must be installed to search an unprojected graph"
                )
            # haversine requires lat, lng coords in radians
            vertices_rad = np.deg2rad(vertices[["y", "x"]])
            points_rad = np.deg2rad(np.array([Y, X]).T)
            dist, pos = BallTree(vertices_rad,
                                 metric="haversine").query(points_rad, k=1)
            dist = dist[:, 0] * EARTH_RADIUS_M  # convert radians -> meters
            ne = vertices.index[pos[:, 0]]

    # convert results to correct types for return
    ne = list(ne)
    dist = list(dist)
    if is_scalar:
        ne = ne[0]
        dist = dist[0]

    if return_dist:
        return ne, dist
    else:
        return ne
Exemplo n.º 5
0
def main(input_dir, output_dir):
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s [%(name)s]: %(message)s')
    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    city_names = []
    rtree = RTreeIndex()

    cities_filename = os.path.join(tempfile.gettempdir(), 'cities.json')

    subprocess.check_call([
        'wget',
        'https://raw.githubusercontent.com/mapzen/metroextractor-cities/master/cities.json',
        '-O', cities_filename
    ])

    all_cities = json.load(open(cities_filename))

    i = 0

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            bbox = data['bbox']
            rtree.insert(i, (float(bbox['left']), float(
                bbox['bottom']), float(bbox['right']), float(bbox['top'])))
            city_names.append(city)
            i += 1

    files = {
        name:
        open(os.path.join(output_dir, 'cities', '{}.geojson'.format(name)),
             'w')
        for name in city_names
    }

    planet = open(os.path.join(output_dir, 'planet.geojson'), 'w')
    planet_addresses_only = open(
        os.path.join(output_dir, 'planet_addresses_only.json'), 'w')

    i = 0
    seen = set()

    for url, canonical, venues in gen_venues(input_dir):
        domain = urlparse.urlsplit(url).netloc.strip('www.')
        for props in venues:
            lat = props.get('latitude')
            lon = props.get('longitude')
            props['canonical'] = canonical
            props['url'] = url
            street = props.get('street_address')
            name = props.get('name')
            planet_hash = hashlib.md5(u'|'.join(
                (name, street, str(lat), str(lon),
                 domain)).encode('utf-8')).digest()
            address_hash = hashlib.md5(u'|'.join(
                (name, street, domain)).encode('utf-8')).digest()
            props['guid'] = props.get('guid', random_guid())
            venue = venue_to_geojson(props)
            if lat is not None and lon is not None:
                try:
                    lat = float(lat)
                    lon = float(lon)
                except Exception:
                    lat = None
                    lon = None
            if lat is not None and lon is not None and planet_hash not in seen:
                cities = list(rtree.intersection((lon, lat, lon, lat)))
                if cities:
                    for c in cities:
                        f = files[city_names[c]]
                    f.write(json.dumps(venue) + '\n')
                if planet_hash not in seen:
                    planet.write(json.dumps(venue) + '\n')
                    seen.add(planet_hash)
            if address_hash not in seen:
                planet_addresses_only.write(json.dumps(props) + '\n')
                seen.add(address_hash)
            i += 1
            if i % 1000 == 0 and i > 0:
                logger.info('did {}'.format(i))

    logger.info('Creating manifest files')

    manifest_files = []

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            f = files[city]
            if f.tell() == 0:
                f.close()
                os.unlink(
                    os.path.join(output_dir, 'cities',
                                 '{}.geojson'.format(city)))
                continue

            bbox = data['bbox']
            lat = midpoint(float(bbox['top']), float(bbox['bottom']))
            lon = midpoint(float(bbox['left']), float(bbox['right']))

            manifest_files.append({
                'latitude':
                lat,
                'longitude':
                lon,
                'file':
                '{}.geojson'.format(city),
                'name':
                city.replace('_', ', ').replace('-', ' ').title()
            })

    manifest = {'files': manifest_files}

    json.dump(manifest, open(os.path.join(output_dir, 'manifest.json'), 'w'))

    logger.info('Done!')