Exemplo n.º 1
  def setup_rtree(self, ndim, clusters=None):
    if self._rtree:
        return self._rtree

    self._ndim = ndim
    if not ndim:
        class k(object):
            def __init__(self, graph):
                self.graph = graph
            def insert(self, *args, **kwargs):
            def delete(self, *args, **kwargs):
            def intersection(self, *args, **kwargs):
                return xrange(len(self.graph.clusters))
        self._rtree = k(self)
        return self._rtree

    p = RProp()
    p.dimension = max(2, ndim)
    p.dat_extension = 'data'
    p.idx_extension = 'index'

    if clusters:
        gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters))
        self._rtree = RTree(gen_func, properties=p)
        self._rtree = RTree(properties=p)
    return self._rtree
def get_sindex(gdf):
    """Get or build an R-Tree spatial index.

    Particularly useful for geopandas<0.2.0;>0.7.0;0.9.0
    sindex = None
    if (hasattr(gdf, '_rtree_sindex')):
        return getattr(gdf, '_rtree_sindex')
    if (isinstance(gdf, geopandas.GeoDataFrame)
            and hasattr(gdf.geometry, 'sindex')):
        sindex = gdf.geometry.sindex
    elif isinstance(gdf, geopandas.GeoSeries) and hasattr(gdf, 'sindex'):
        sindex = gdf.sindex
    if sindex is not None:
        if (hasattr(sindex, "nearest")
                and sindex.__class__.__name__ != "PyGEOSSTRTreeIndex"):
            # probably rtree.index.Index
            return sindex
            # probably PyGEOSSTRTreeIndex but unfortunately, 'nearest'
            # with 'num_results' is required
            sindex = None
    if rtree and len(gdf) >= rtree_threshold:
        # Manually populate a 2D spatial index for speed
        sindex = Index()
        # slow, but reliable
        for idx, item in enumerate(gdf.bounds.itertuples()):
            sindex.add(idx, item[1:])
        # cache the index for later
        setattr(gdf, '_rtree_sindex', sindex)
    return sindex
 def read_airspace(self, airspace):
     index = Index()
     with open(airspace, 'r') as f:
         reader = openair.Reader(f)
         for record, error in reader:
             if error:
                     f'line {error.lineno} of {os.path.basename(airspace)} - {error}'
                     zone = Airspace(record)
                     if not self.agl_validable and (zone.ground_floor
                                                    or zone.ground_ceiling):
                             f'{zone.name} will not be checked because ground altitude of flight could not be retrieved.'
                         if zone.bounds:
                             index.insert(id(zone), zone.bounds, obj=zone)
                 except KeyError:
                         f'line {reader.reader.lineno} of {os.path.basename(airspace)} - error in previous record'
     return index
def create_spatial_index(shape_dict):
    print >> sys.stderr, 'Making spatial index...',
    spatial_index = Index()
    for index, (blockid, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=blockid)
    print >> sys.stderr, 'done.'
    return spatial_index
def create_spatial_index(shape_dict):
    print >> sys.stderr, 'Making spatial index...',
    spatial_index = Index()
    for index, (blockid, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=blockid)
    print >> sys.stderr, 'done.'
    return spatial_index
    def setup_rtree(self, ndim, clusters=None):
        if self._rtree:
            return self._rtree

        self._ndim = ndim
        if not ndim:

            class k(object):
                def __init__(self, graph):
                    self.graph = graph

                def insert(self, *args, **kwargs):

                def delete(self, *args, **kwargs):

                def intersection(self, *args, **kwargs):
                    return xrange(len(self.graph.clusters))

            self._rtree = k(self)
            return self._rtree

        p = RProp()
        p.dimension = max(2, ndim)
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        if clusters:
            gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None)
                        for i, c in enumerate(clusters))
            self._rtree = RTree(gen_func, properties=p)
            self._rtree = RTree(properties=p)
        return self._rtree
class GeneticLabeler(BaseGeneticLabeler):
    def __init__(self, points, bounding_box):
        BaseGeneticLabeler.__init__(self, points, bounding_box)

    def build_index(self):
        label_candidates = []
        for p in self.points:
        self.items = []

        self.idx = Index()
        for i, item in enumerate(self.items):
            item.index = i
            self.idx.insert(i, item.box)

    def evaluate_fitness(self, individual):
        penalty = 0
        for lpid, pos in enumerate(individual):

        for lpid, lcid in enumerate(individual):
            lp = self.labeled_points[lpid]
            lc = lp.label_candidates[lcid]

            penalty += evaluate_label(lc, self.items, self.idx, selected_only=True)
        return (-penalty,)
 def __init__(self, resolution, basename=None, overwrite=False):
     # type: (float) -> None
     self._res = resolution
     self._cnt = 0
     if basename is None:
         self._index = Index(interleaved=True)
         p = Property(overwrite=overwrite)
         self._index = Index(basename, interleaved=True, properties=p)
 def _construct_index(self):
     Separate the index construction from the constructor, allowing a GUI override
     self.index = Index()
     for i, j, k in self.ikle:
         t = Polygon([self.points[i], self.points[j], self.points[k]])
         self.triangles[i, j, k] = t
         self.index.insert(i, t.bounds, obj=(i, j, k))
 def _construct_index(self, iter_pbar):
     Separate the index construction from the constructor, allowing a GUI override
     @param iter_pbar: iterable progress bar
     self.index = Index()
     for i, j, k in iter_pbar(self.ikle, unit='elements'):
         t = Polygon([self.points[i], self.points[j], self.points[k]])
         self.triangles[i, j, k] = t
         self.index.insert(i, t.bounds, obj=(i, j, k))
def compute_indicatormatrix(orig,
    Compute the indicatormatrix

    The indicatormatrix I[i,j] is a sparse representation of the ratio
    of the area in orig[j] lying in dest[i], where orig and dest are
    collections of polygons, i.e.

    A value of I[i,j] = 1 indicates that the shape orig[j] is fully
    contained in shape dest[j].

    Note that the polygons must be in the same crs.

    orig : Collection of shapely polygons
    dest : Collection of shapely polygons

    I : sp.sparse.lil_matrix

    dest = reproject_shapes(dest, dest_proj, orig_proj)
    indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=np.float)

        from rtree.index import Index

        idx = Index()
        for j, o in enumerate(orig):
            idx.insert(j, o.bounds)

        for i, d in enumerate(dest):
            for j in idx.intersection(d.bounds):
                o = orig[j]
                area = d.intersection(o).area
                indicator[i, j] = area / o.area

    except ImportError:
            "Rtree is not available. Falling back to slower algorithm.")

        dest_prepped = list(map(prep, dest))

        for i, j in product(range(len(dest)), range(len(orig))):
            if dest_prepped[i].intersects(orig[j]):
                area = dest[i].intersection(orig[j]).area
                indicator[i, j] = area / orig[j].area

    return indicator
    def build_index(self):
        label_candidates = []
        for p in self.points:
        self.items = []

        self.idx = Index()
        for i, item in enumerate(self.items):
            item.index = i
            self.idx.insert(i, item.box)
class StreetIndex(object):
    def __init__(self, streets_file):
        self.idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = street['properties']['id']
                street_shape = asShape(street['geometry'])
                for i in range(len(street_shape.geoms)):
                    seg_id = self.encode_seg_id(i, street_id)
                    self.idx.insert(seg_id, street_shape.geoms[i].coords[0])
                    self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1])

        self.bb_idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = int(street['properties']['id'])
                street_shape = asShape(street['geometry'])
                self.bb_idx.insert(street_id, list(street_shape.bounds))

    def encode_seg_id(self, i, street_id):
        return i * 1000000 + int(street_id)

    def decode_seg_id(self, seg_id):
        i = abs(seg_id) / 1000000
        return abs(seg_id) - i

    def find_nearest_street(self, shape):
        shape = asShape(shape['geometry'])
        shape_type = shape.geom_type
        if shape_type == 'Polygon' or shape_type == 'MultiPolygon':
            ref_point = (
            ref_point = (
        street_id = list(self.bb_idx.nearest(ref_point))[0]
        return str(street_id)

    def find_connected_street(self, street):
        street_id = int(street['properties']['id'])
        street_shape = asShape(street['geometry'])
        street_start = street_shape.geoms[0].coords[0]
        street_end = street_shape.geoms[-1].coords[-1]
        seg_ids = list(self.idx.intersection(street_start))
        seg_ids += list(self.idx.intersection(street_end))
        street_ids = set(map(self.decode_seg_id, seg_ids))
        if street_id in street_ids:
        return street_ids
 def __init__(self, stype, bbox=None, **kwargs):
     ci = kwargs.pop('custom_index', None)
     self._filename = kwargs.pop('filename', None)
     self._bbox = bbox
     self._stype = stype.lower()
     self._df = None
     if ci and stype.lower() == 'custom':
         self._index = ci
     elif stype.lower() == 'quadtree' and bbox:
         self._index = QIndex(bbox=bbox)
     elif RIndex and stype.lower() == 'rtree':
         self._index = RIndex(self._filename)
         raise ValueError("Could not create the spatial index.")
 def __init__(self, input_header, construct_index=False, iter_pbar=lambda x, unit: x):
     @param input_header <slf.Serafin.SerafinHeader>: input Serafin header
     @param construct_index <bool>: perform the index construction
     @param iter_pbar: iterable progress bar
     self.x, self.y = input_header.x[:input_header.nb_nodes_2d], input_header.y[:input_header.nb_nodes_2d]
     self.ikle = input_header.ikle_2d - 1  # back to 0-based indexing
     self.triangles = {}
     self.nb_points = self.x.shape[0]
     self.nb_triangles = self.ikle.shape[0]
     self.points = np.stack([self.x, self.y], axis=1)
     if not construct_index:
         self.index = Index()
class Mesh2D:
    The general representation of mesh in Serafin 2D.
    The basis for interpolation, volume calculations etc.
    def __init__(self,
                 iter_pbar=lambda x: x):
        @param input_header <slf.Serafin.SerafinHeader>: input Serafin header
        @param construct_index <bool>: perform the index construction
        @param iter_pbar: iterable progress bar
        self.x, self.y = input_header.x[:input_header.
                                        nb_nodes_2d], input_header.y[:
        self.ikle = input_header.ikle_2d - 1  # back to 0-based indexing
        self.triangles = {}
        self.nb_points = self.x.shape[0]
        self.nb_triangles = self.ikle.shape[0]
        self.points = np.stack([self.x, self.y], axis=1)
        if not construct_index:
            self.index = Index()

    def _construct_index(self, iter_pbar):
        Separate the index construction from the constructor, allowing a GUI override
        @param iter_pbar: iterable progress bar
        self.index = Index()
        for i, j, k in iter_pbar(self.ikle, unit='elements'):
            t = Polygon([self.points[i], self.points[j], self.points[k]])
            self.triangles[i, j, k] = t
            self.index.insert(i, t.bounds, obj=(i, j, k))

    def get_intersecting_elements(self, bounding_box):
        @brief Return the triangles in the mesh intersecting the bounding box
        @param bounding_box <tuple>: (left, bottom, right, top) of a 2d geometrical object
        @return <[tuple]>: The list of triangles (i,j,k) intersecting the bounding box
        return list(self.index.intersection(bounding_box, objects='raw'))
    def create_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):
            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))
            return k()

        ndim = len(clusters[0].bbox[0]) + 1
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            rtree.insert(idx, c.bbox[0] + (0,) + c.bbox[1] + (1,))
        return rtree
    def test_tpr(self):
        # TODO : this freezes forever on some windows cloud builds
        if os.name == 'nt':

        # Cartesians list for brute force
        objects = dict()
        tpr_tree = Index(properties=Property(type=RT_TPRTree))

        for operation, t_now, object_ in data_generator():
            if operation == "INSERT":
                tpr_tree.insert(object_.id, object_.get_coordinates())
                objects[object_.id] = object_
            elif operation == "DELETE":
                tpr_tree.delete(object_.id, object_.get_coordinates(t_now))
                del objects[object_.id]
            elif operation == "QUERY":
                tree_intersect = set(

                # Brute intersect
                brute_intersect = set()
                for tree_object in objects.values():
                    x_low, y_low = tree_object.getXY(object_.start_time)
                    x_high, y_high = tree_object.getXY(object_.end_time)

                    if intersects(
                            x_low, y_low, x_high, y_high,  # Line
                            object_.x, object_.y, object_.dx, object_.dy):  # Rect

                # Tree should match brute force approach
                assert tree_intersect == brute_intersect
def demo_delete():
    seed = 1  # Seed for random points

    countries = get_countries()

    country_id_to_remove = 170  # United States of America
    country_uuids_to_remove = []  # Polygons' ids to remove from the index

    properties = Property()
    # properties.writethrough = True
    # properties.leaf_capacity = 1000
    # properties.fill_factor = 0.5
    index = Index(properties=properties)

    points_per_polygon = 1
    points = []

    # Inserts countries data to the index
    for i, (country_name, geometry) in enumerate(countries):
        for polygon in get_polygons(geometry):
            temp_uuid = uuid.uuid1().int
            index.insert(temp_uuid, polygon.bounds, country_name)

            if i == country_id_to_remove:
                # Saves index ids of the polygon to be removed later

            # Generates random points in every polygon and saves them
            random_points = gen_random_point(points_per_polygon, polygon, seed)
            points.append((country_name, random_points))

    # Checks every generated point has matches
    for (country_name, country_points) in points:
        for point in country_points:
            hits = list(index.intersection(point.bounds, objects=True))
            assert any(hit.object == country_name for hit in hits)

    # Remove geometry
    geometry = countries[country_id_to_remove][1]
    for i, polygon in enumerate(get_polygons(geometry)):
        index.delete(country_uuids_to_remove[i], polygon.bounds)

    points_missing = []

    # Checks (again) if every generated point has matches
    for (country_name, country_points) in points:
        for point in country_points:
            hits = list(index.intersection(point.bounds, objects=True))
            # Save any point without matches
            if not any(hit.object == country_name for hit in hits):
                points_missing.append(str(point) + " - " + country_name)

    # Print missing points
    for point in points_missing:
def get_sindex(gdf):
    """Helper function to get or build a spatial index

    Particularly useful for geopandas<0.2.0
    assert isinstance(gdf, geopandas.GeoDataFrame)
    has_sindex = hasattr(gdf, 'sindex')
    if has_sindex:
        sindex = gdf.geometry.sindex
    elif rtree and len(gdf) >= rtree_threshold:
        # Manually populate a 2D spatial index for speed
        sindex = Index()
        # slow, but reliable
        for idx, (segnum, row) in enumerate(gdf.bounds.iterrows()):
            sindex.add(idx, tuple(row))
        sindex = None
    return sindex
    def create_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):

            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))

            return k()

        ndim = len(clusters[0].bbox[0]) + 1
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            rtree.insert(idx, c.bbox[0] + (0, ) + c.bbox[1] + (1, ))
        return rtree
    def __init__(self, streets_file):
        self.idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = street['properties']['id']
                street_shape = asShape(street['geometry'])
                for i in range(len(street_shape.geoms)):
                    seg_id = self.encode_seg_id(i, street_id)
                    self.idx.insert(seg_id, street_shape.geoms[i].coords[0])
                    self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1])

        self.bb_idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = int(street['properties']['id'])
                street_shape = asShape(street['geometry'])
                self.bb_idx.insert(street_id, list(street_shape.bounds))
    def construct_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):
            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))
            return k()
        ndim = max(2, len(clusters[0].centroid))
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            box = c.bbox #self.scale_box(c.bbox)
            if ndim == 1:
                rtree.insert(idx, box[0] + [0] + box[1] + [1])
                rtree.insert(idx, box[0] + box[1])
        return rtree
def snap_to_edge_position(gdf, points, k=3, rtree=None):
    Snap given points in the plane to edges in GeoDataFrame of edges.

    gdf : GeoDataframe
        The edges of spatial network as a Geodataframe.
    points : array of floats, shape (M, 2)
        The cartesian coordinates of the points to be snapped.
    k : integer, optional
        Number of nearest edges to consider.

    nearest_edges : list of integers, length M
        Indices of nearest edges in the GeoDataframe.
    refdistances : list of floats, length M
        Linear referencing distances of points along nearest edge.
    X, Y = points.T
    geom = gdf["geometry"]

    # If not passed, build the r-tree spatial index by position for subsequent iloc
    if rtree == None:
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geom.bounds.values):
            rtree.insert(pos, bounds)

    # use r-tree to find possible nearest neighbors, one point at a time,
    # then minimize euclidean distance from point to the possible matches
    nearest_edges = list()
    refdistances = list()
    for xy in zip(X, Y):
        p = Point(xy)
        dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p)
        ne = geom[dists.idxmin()]

    return nearest_edges, refdistances
def get_rtree(geometries, fp):
    fp = fp.as_posix()
    if not os.path.exists(fp + '.idx'):
        # Populate R-tree index with bounds of geometries
        print('Populate {} tree'.format(fp))
        idx = Index(fp)
        for i, geo in enumerate(geometries):
            idx.insert(i, geo.bounds)

    return {'rtree': Index(fp), 'geometries': geometries}
 def build_rtree(self):
     Construct an R-tree for the domain. This may reduce the
     computational complexity of the methods `intersection_count`,
     `contains`, `orient_simplices`, and `snap`.
     # create a bounding box for each simplex and add those
     # bounding boxes to the R-tree
     if self.rtree is not None:
         # do nothing because the R-tree already exists
         logger.debug('R-tree already exists')
     smp_min = self.vertices[self.simplices].min(axis=1)
     smp_max = self.vertices[self.simplices].max(axis=1)
     bounds = np.hstack((smp_min, smp_max))
     p = Property()
     p.dimension = self.dim
     self.rtree = Index(properties=p)
     for i, bnd in enumerate(bounds):
         self.rtree.add(i, bnd)
    def build_cache(self):
        label_candidates = []
        for p in self.points:
        items = []

        idx = Index()
        for i, item in enumerate(items):
            item.index = i
            idx.insert(i, item.box)

        for lc in label_candidates:
            lc.penalty = POSITION_WEIGHT * lc.position
            lc.label_penalties = [0 for i in range(len(label_candidates))]
            intersecting_item_ids = idx.intersection(lc.box)
            bbox_counted = False

            for item_id in intersecting_item_ids:
                item = items[item_id]

                if item == lc or item == lc.point:

                if isinstance(item, Label):
                    if lc.point == item.point:
                        lc.label_penalties[item.index] = item.overlap(lc)

                if isinstance(item, BoundingBoxBorder):
                    if bbox_counted:
                    bbox_counted = True

                lc.penalty += item.overlap(lc)
class PolyStore(object):
    def __init__(self):
        self.index = Index()

    def load_from_shapefile(self, sf):
        self.shapes = sf.shapes()
        self.records = sf.records()
        for index, shape in enumerate(self.shapes):
            if len(shape.parts) > 1:
                print self.records[index], len(shape.parts)
            self.index.insert(index, shape.bbox)

    def get_shape_at_point(self, (x, y)):
        candidates = self.index.intersection((x, y, x, y))
        for candidate in candidates:
            shape = self.shapes[candidate]
            for i, part in enumerate(shape_to_parts_list(shape)):
                if Polygon(part).contains(Point(x, y)):
                    if i in SHAPE_LAND.get(self.records[candidate][4], []):
                        return '0'
                    return self.records[candidate][4]
        return None
class PolyStore(object):
    def __init__(self):
        self.index = Index()

    def load_from_shapefile(self, sf):
        self.shapes = sf.shapes()
        self.records = sf.records()
        for index, shape in enumerate(self.shapes):
            if len(shape.parts) > 1:
                print self.records[index], len(shape.parts)
            self.index.insert(index, shape.bbox)

    def get_shape_at_point(self, (x, y)):
        candidates = self.index.intersection((x, y, x, y))
        for candidate in candidates:
            shape = self.shapes[candidate]
            for i, part in enumerate(shape_to_parts_list(shape)):
                if Polygon(part).contains(Point(x, y)):
                    if i in SHAPE_LAND.get(self.records[candidate][4], []):
                        return '0'
                    return self.records[candidate][4]
        return None
def local_search(points, bounding_box, iterations):
    labeled_points = [p for p in points if p.text]

    items = []
    items.extend([p.label for p in labeled_points])

    idx = Index()
    for i, item in enumerate(items):
        item.index = i
        idx.insert(item.index, item.box)

    for i in range(iterations):
        for lp in labeled_points:
            best_candidate = None
            min_penalty = None
            for lc1 in lp.label_candidates:
                penalty = POSITION_WEIGHT * lc1.position

                # Check overlap with other labels and points
                intersecting_item_ids = idx.intersection(lc1.box)
                for item_id in intersecting_item_ids:
                    item = items[item_id]
                    if hasattr(item, "point") and lc1.point == item.point:
                    penalty += item.overlap(lc1)

                if min_penalty is None or penalty < min_penalty:
                    min_penalty = penalty
                    best_candidate = lc1

            # Remove the old label from the index
            idx.delete(lp.label.index, lp.label.box)

            # Select the new label

            # Add the new label to the index and item list
            idx.insert(len(items), lp.label.box)
def evaluate_labels(labels, points, bounding_box):
    items = []

    t1 = time.clock()
    idx = Index()

    for i, item in enumerate(items):
        item.index = i
        idx.insert(i, item.box)

    t2 = time.clock()
    # print(f"Index creation: {t2-t1}")

    # Update penalties for overlap with other objects
    penalties = [evaluate_label(l, items, idx) for l in labels]

    t3 = time.clock()
    # print(f"Overlap checking: {t3 - t2}")

    print(f"Total time: {t3 - t1}")
    return penalties
    def flush(self):
        Saves the index to disk if a filename is given for an R-Tree Spatial Index.

        **This applies only to the R-Tree implementation of the spatial index.**

        :returns: Boolean

        if hasattr(self._index, 'flush'):
            getattr(self._index, 'flush')()
        elif self._stype == 'rtree' and \
            self._index = RIndex(self._filename)
            return False
        return True
def make_index(shapes):
    """Creates an index for fast and efficient spatial queries.

      shapes: shapely shapes to bulk-insert bounding boxes for into the spatial index.

      The spatial index created from the shape's bounding boxes.

    # Todo: benchmark these for our use-cases
    prop = Property()
    prop.dimension = 2
    prop.leaf_capacity = 1000
    prop.fill_factor = 0.9

    def bounded():
        for i, shape in enumerate(shapes):
            yield (i, shape.bounds, None)

    return Index(bounded(), properties=prop)
    def __init__(self, context: DyCleeContext):
        self.context = context

        self.dense_µclusters: Set[MicroCluster] = Set()
        self.semidense_µclusters: Set[MicroCluster] = Set()
        self.outlier_µclusters: Set[MicroCluster] = Set()
        self.long_term_memory: Set[MicroCluster] = Set()
        self.eliminated: Set[MicroCluster] = Set()

        self.next_µcluster_index: int = 0
        self.next_class_label: int = 0
        self.n_steps: int = 0
        self.last_partitioning_step: int = 0
        self.last_density_step: int = 0

        if self.context.maintain_rtree:
            p = RTreeProperty(dimension=self.context.n_features)
            self.rtree = RTreeIndex(properties=p)
            # This mapping is used to retrieve microcluster objects from their hashes
            # stored with their locations in the R*-tree
            self.µcluster_map: Optional[dict[int, MicroCluster]] = {}
            self.rtree = None
            self.µcluster_map = None
class SpatialIndex():

    A spatial index is a type of extended index that allows you to index a
    spatial column. A spatial column is a table column that contains data of a
    spatial data type.

    Spatial indexes help to improve spatial query performance on a dataframe.
    Identifying a feature, selecting features, and joining data all have better
    performace when using spatial indexing.

    ====================     ==================================================
    Arguement                Description
    --------------------     --------------------------------------------------
    stype                    Required String. This sets the type of spatial
                             index being used by the user. The current types of
                             spatial indexes are: custom, rtree and quadtree.
    --------------------     --------------------------------------------------
    bbox                     Optional Tuple. The extent of the spatial data as:
                             (xmin, ymin, xmax, ymax). This parameter is required
                             if a QuadTree Spatial Index is being used.

                             bbox=(-100, -50, 100, 50)
    --------------------     --------------------------------------------------
    filename                 Optional String. The name of the spatial index
                             file. This is only supported by rtree spatial
                             indexes. For large datasets an rtree index can be
                             saved to disk and used at a later time. If this is
                             not provided the r-tree index will be in-memory.
    --------------------     --------------------------------------------------
    custom_index             Optional Object. Sometimes QuadTree and Rtree
                             indexing is not enough. A custom spatial index class
                             can be giving to the SpatialIndex class and used
                             using encapsulation.  The custom index must have two
                             methods: `intersect` that excepts a tuple, and
                             `insert` which must accept an oid and a bounding
                             box. This object is required when `stype` of
                             'custom' is specified.
    ====================     ==================================================

    _stype = None
    _bbox = None
    _index = None
    _df = None

    def __init__(self, stype, bbox=None, **kwargs):
        ci = kwargs.pop('custom_index', None)
        self._filename = kwargs.pop('filename', None)
        self._bbox = bbox
        self._stype = stype.lower()
        self._df = None
        if ci and stype.lower() == 'custom':
            self._index = ci
        elif stype.lower() == 'quadtree' and bbox:
            self._index = QIndex(bbox=bbox)
        elif RIndex and stype.lower() == 'rtree':
            self._index = RIndex(self._filename)
            raise ValueError("Could not create the spatial index.")

    def intersect(self, bbox):
        Returns the spatial features that intersect the bbox

        :bbox: tuple - (xmin,ymin,xmax,ymax)

        :returns: list
        if self._stype.lower() in ['rtree']:
            return list(self._index.intersection(bbox))
        elif self._stype.lower() in ['quadtree']:
            return list(self._index.intersect(bbox=bbox))
            return list(self._index.intersect(bbox))

    def insert(self, oid, bbox):
        Inserts the entry into the spatial index

        :oid: unique id
        :bbox: tuple - (xmin,ymin,xmax,ymax)
        if self._index is None:
            raise Exception(("Could not insert into a spatial index because "
                             "it does not exist."))
        if self._stype == 'rtree' and \
           HASRTREE and \
           isinstance(self._index, RIndex):

            r = self._index.insert(id=oid, coordinates=bbox, obj=None)
            return r
        elif self._stype.lower() == 'quadtree':
            return self._index.insert(item=oid, bbox=bbox)
        elif self._stype.lower() == 'custom':
            r = self._index.intersect(oid, bbox)
            return r

    def flush(self):
        Saves the index to disk if a filename is given for an R-Tree Spatial Index.

        **This applies only to the R-Tree implementation of the spatial index.**

        :returns: Boolean

        if hasattr(self._index, 'flush'):
            getattr(self._index, 'flush')()
        elif self._stype == 'rtree' and \
            self._index = RIndex(self._filename)
            return False
        return True
 def __init__(self):
     self.index = Index()
class AdjacencyGraph(object):
    def __init__(self, clusters, partitions_complete=True):
        self.partitions_complete = partitions_complete
        self.graph = defaultdict(set)
        self.cid = 0
        self.clusters = []
        self.id2c = dict()
        self.c2id = dict()
        self._rtree = None  # internal datastructure
        self._ndim = None


    def to_json(self):
        data = {
                'clusters' : [c and c.__dict__ or None for c in self.clusters],
                'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()],
                'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()],
                'graph' : [(key.__dict__, [val.__dict__ for val in vals]) for key, vals in self.graph.itemsiter()],
                'cid' : self.cid,
                '_ndim' : self._ndim,
                '_rtreename' : 'BLAH'
        return json.dumps(data)

    def from_json(self, encoded):
        data = json.loads(encoded)
        self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']]
        self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']])
        self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']])
        self.graph = dict([(Cluster.from_dict(key), map(Cluster.from_dict, vals)) for key, vals in data['graph']])
        self.cid = data['cid']
        self._ndim = data['_ndim']
        self._rtree = None

    def setup_rtree(self, ndim, clusters=None):
        if self._rtree:
            return self._rtree

        self._ndim = ndim
        if not ndim:
            class k(object):
                def __init__(self, graph):
                    self.graph = graph
                def insert(self, *args, **kwargs):
                def delete(self, *args, **kwargs):
                def intersection(self, *args, **kwargs):
                    return xrange(len(self.graph.clusters))
            self._rtree = k(self)
            return self._rtree

        p = RProp()
        p.dimension = max(2, ndim)
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        if clusters:
            gen_func = ((i, self.bbox_rtree(c, enlarge=0.00001), None) for i, c in enumerate(clusters))
            self._rtree = RTree(gen_func, properties=p)
            self._rtree = RTree(properties=p)
        return self._rtree

    def bbox_rtree(self, cluster, enlarge=0.):
        bbox = cluster.bbox
        lower, higher = map(list, bbox)
        if self._ndim == 1:

        if enlarge != 1.:
            lower = [v - enlarge for v in lower]
            higher = [v + enlarge for v in higher]

        bbox = lower + higher
        return bbox

    def insert_rtree(self, idx, cluster):
        return cluster

    def remove_rtree(self, idx, cluster):
        self._rtree.delete(idx, self.bbox_rtree(cluster))
        return cluster

    def search_rtree(self, cluster):
        bbox = self.bbox_rtree(cluster, enlarge=0.00001)
        res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)]
        return filter(bool, res)

    def bulk_init(self, clusters):
        if clusters:
            self.setup_rtree(len(clusters[0].bbox[0]), clusters)

        for cid, c in enumerate(clusters):
            self.id2c[cid] = c
            self.c2id[c] = cid

        for idx, c in enumerate(clusters):
            for n in self.search_rtree(c):
                if self.c2id[n] <= idx: continue
                if c.discretes_contains(n) and box_completely_contained(c.bbox, n.bbox): continue
                if not c.adjacent(n, 0.8): continue

    def insert(self, cluster):
        if cluster in self.graph:

        self.graph[cluster] = set()
        #for o in self.search_rtree(cluster):
        for o in self.graph.keys():
            if cluster == o:
            if cluster.adjacent(o, 0.8) or (volume(intersection_box(cluster.bbox, o.bbox)) > 0 and not cluster.contains(o)):

        cid = len(self.clusters)
        self.id2c[cid] = cluster
        self.c2id[cluster] = cid
        self.insert_rtree(cid, cluster)

    def remove(self, cluster):
        if cluster not in self.graph:

            for neigh in self.graph[cluster]:
                if not neigh == cluster:
        del self.graph[cluster]

        cid = self.c2id[cluster]
        self.remove_rtree(cid, cluster)
        del self.c2id[cluster]
        del self.id2c[cid]
        self.clusters[cid] = None

    def neighbors(self, cluster):
        if not self.partitions_complete:
            return filter(bool, self.clusters)

        if cluster in self.graph:
            return self.graph[cluster]

        ret = set()
        intersects = self.search_rtree(cluster)
        for key in filter(cluster.adjacent, intersects):
            if box_completely_contained(key.bbox, cluster.bbox):
        return ret
class AdjacencyVersion(object):

  def __init__(self, feature_mapper):
    #self.partitions_complete = partitions_complete
    self.cid = 0
    self.disc_idxs = {}
    self.feature_mapper = feature_mapper
    self.radius = .15
    self.metric = 'hamming'

    self._rtree = None  # internal datastructure
    self._ndim = None
    self.clusters = []
    self.id2c = dict()
    self.c2id = dict()

  def to_json(self):
    data = {
            'clusters' : [c and c.__dict__ or None for c in self.clusters],
            'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()],
            'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()],
            'cid' : self.cid,
            '_ndim' : self._ndim,
            '_rtreename' : 'BLAH'
    return json.dumps(data)

  def from_json(self, encoded):
    data = json.loads(encoded)
    self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']]
    self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']])
    self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']])
    self.cid = data['cid']
    self._ndim = data['_ndim']
    self._rtree = None

  def setup_rtree(self, ndim, clusters=None):
    if self._rtree:
        return self._rtree

    self._ndim = ndim
    if not ndim:
        class k(object):
            def __init__(self, graph):
                self.graph = graph
            def insert(self, *args, **kwargs):
            def delete(self, *args, **kwargs):
            def intersection(self, *args, **kwargs):
                return xrange(len(self.graph.clusters))
        self._rtree = k(self)
        return self._rtree

    p = RProp()
    p.dimension = max(2, ndim)
    p.dat_extension = 'data'
    p.idx_extension = 'index'

    if clusters:
        gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters))
        self._rtree = RTree(gen_func, properties=p)
        self._rtree = RTree(properties=p)
    return self._rtree

  def bbox_rtree(self, cluster, enlarge=0.):
    cols = cluster.cols
    bbox = cluster.bbox
    lower, higher = map(list, bbox)
    if self._ndim == 1:

    if enlarge != 0:
      for idx, col in enumerate(cols):
        rng = enlarge * self.feature_mapper.ranges[col]
        lower[idx] -= rng
        higher[idx] += rng

    bbox = lower + higher
    return bbox

  def insert_rtree(self, idx, cluster):
    return cluster

  def remove_rtree(self, idx, cluster):
    self._rtree.delete(idx, self.bbox_rtree(cluster))
    return cluster

  def search_rtree(self, cluster):
    bbox = self.bbox_rtree(cluster, enlarge=0.01)
    return self._rtree.intersection(bbox)
    res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)]
    return filter(bool, res)

  def bulk_init(self, clusters):
    if not clusters: return

    self.setup_rtree(len(clusters[0].bbox[0]), clusters)
    self.clusters = clusters
    for cid, c in enumerate(clusters):
      self.id2c[cid] = c
      self.c2id[c] = cid
    for dim in self.feature_mapper.attrs:
      Xs = []
      for cidx, c in enumerate(clusters):
        Xs.append(self.feature_mapper(c, dim))
      idx = NearestNeighbors(
      self.disc_idxs[dim] = idx

  def contains(self, cluster):
    return cluster in self.c2id
  def remove(self, cluster):
    if cluster in self.c2id:
      cid = self.c2id[cluster]
      self.remove_rtree(cid, cluster)
      del self.c2id[cluster]
      del self.id2c[cid]
      self.clusters[cid] = None
      return True
    return False

  def neighbors(self, cluster):
    ret = None
    for name, vals in cluster.discretes.iteritems():
      if name not in self.disc_idxs:
        return []
      vect = self.feature_mapper(cluster, name)
      index = self.disc_idxs[name]
      dists, idxs = index.radius_neighbors(vect, radius=self.radius)
      idxs = set(idxs[0].tolist())

      if ret is None:
        ret = idxs
      if not ret: return []

    idxs = self.search_rtree(cluster)
    if ret is None:
      ret = set(idxs)

    return filter(bool, [self.clusters[idx] for idx in ret])

def main(input_dir, output_dir):
    formatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]: %(message)s')
    handler = logging.StreamHandler(sys.stderr)

    city_names = []
    rtree = RTreeIndex()

    cities_filename = os.path.join(tempfile.gettempdir(), 'cities.json')

    subprocess.check_call(['wget', 'https://raw.githubusercontent.com/mapzen/metroextractor-cities/master/cities.json', '-O', cities_filename])

    all_cities = json.load(open(cities_filename))

    i = 0

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            bbox = data['bbox']
            rtree.insert(i, (float(bbox['left']), float(bbox['bottom']), float(bbox['right']), float(bbox['top'])))        
            i += 1

    files = {name: open(os.path.join(output_dir, 'cities', '{}.geojson'.format(name)), 'w') for name in city_names}

    planet = open(os.path.join(output_dir, 'planet.geojson'), 'w')
    planet_addresses_only = open(os.path.join(output_dir, 'planet_addresses_only.json'), 'w')

    i = 0
    seen = set()

    for url, canonical, venues in gen_venues(input_dir):
        domain = urlparse.urlsplit(url).netloc.strip('www.')
        for props in venues:
            lat = props.get('latitude')
            lon = props.get('longitude')
            props['canonical'] = canonical
            props['url'] = url
            street = props.get('street_address')
            name = props.get('name')
            planet_hash = hashlib.md5(u'|'.join((name, street, str(lat), str(lon), domain)).encode('utf-8')).digest()
            address_hash = hashlib.md5(u'|'.join((name, street, domain)).encode('utf-8')).digest()
            props['guid'] = props.get('guid', random_guid())
            venue = venue_to_geojson(props)
            if lat is not None and lon is not None:
                    lat = float(lat)
                    lon = float(lon)
                except Exception:
                    lat = None
                    lon = None
            if lat is not None and lon is not None and planet_hash not in seen:
                cities = list(rtree.intersection((lon, lat, lon, lat)))
                if cities:
                    for c in cities:
                        f = files[city_names[c]]
                    f.write(json.dumps(venue) + '\n')
                if planet_hash not in seen:
                    planet.write(json.dumps(venue) + '\n')
            if address_hash not in seen:
                planet_addresses_only.write(json.dumps(props) + '\n')
            i += 1
            if i % 1000 == 0 and i > 0:
                logger.info('did {}'.format(i))

    logger.info('Creating manifest files')

    manifest_files = []

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            f = files[city]
            if f.tell() == 0:
                os.unlink(os.path.join(output_dir, 'cities', '{}.geojson'.format(city)))

            bbox = data['bbox']
            lat = midpoint(float(bbox['top']), float(bbox['bottom']))
            lon = midpoint(float(bbox['left']), float(bbox['right']))

            manifest_files.append({'latitude': lat, 'longitude': lon, 'file': '{}.geojson'.format(city), 'name': city.replace('_', ', ').replace('-', ' ').title()})

    manifest = {'files': manifest_files}

    json.dump(manifest, open(os.path.join(output_dir, 'manifest.json'), 'w'))

def create_spatial_index(shape_dict):

    spatial_index = Index()
    for index, (name, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=name)
    return spatial_index
 def __init__(self, *args):
     RTreeIndex.__init__(self, *args)
 def __init__(self, *args):
     if not base.HAS_SINDEX:
         raise ImportError("SpatialIndex needs `rtree`")
     RTreeIndex.__init__(self, *args)