示例#1
0
class GeneticLabeler(BaseGeneticLabeler):
    def __init__(self, points, bounding_box):
        BaseGeneticLabeler.__init__(self, points, bounding_box)
        self.build_index()

    def build_index(self):
        label_candidates = []
        for p in self.points:
            label_candidates.extend(p.label_candidates)
        self.items = []
        self.items.extend(label_candidates)
        self.items.extend(self.points)
        self.items.extend(self.bounding_box.border_config)

        self.idx = Index()
        for i, item in enumerate(self.items):
            item.index = i
            self.idx.insert(i, item.box)

    def evaluate_fitness(self, individual):
        penalty = 0
        for lpid, pos in enumerate(individual):
            self.labeled_points[lpid].label_candidates[pos].select()

        for lpid, lcid in enumerate(individual):
            lp = self.labeled_points[lpid]
            lc = lp.label_candidates[lcid]

            penalty += evaluate_label(lc, self.items, self.idx, selected_only=True)
        return (-penalty,)
示例#2
0
 def read_airspace(self, airspace):
     index = Index()
     with open(airspace, 'r') as f:
         reader = openair.Reader(f)
         for record, error in reader:
             if error:
                 logging.warning(
                     f'line {error.lineno} of {os.path.basename(airspace)} - {error}'
                 )
             else:
                 try:
                     zone = Airspace(record)
                     if not self.agl_validable and (zone.ground_floor
                                                    or zone.ground_ceiling):
                         logging.warning(
                             f'{zone.name} will not be checked because ground altitude of flight could not be retrieved.'
                         )
                     else:
                         if zone.bounds:
                             index.insert(id(zone), zone.bounds, obj=zone)
                 except KeyError:
                     logging.warning(
                         f'line {reader.reader.lineno} of {os.path.basename(airspace)} - error in previous record'
                     )
     return index
示例#3
0
文件: test_tpr.py 项目: sthagen/rtree
    def test_tpr(self):
        # TODO : this freezes forever on some windows cloud builds
        if os.name == 'nt':
            return

        # Cartesians list for brute force
        objects = dict()
        tpr_tree = Index(properties=Property(type=RT_TPRTree))

        for operation, t_now, object_ in data_generator():
            if operation == "INSERT":
                tpr_tree.insert(object_.id, object_.get_coordinates())
                objects[object_.id] = object_
            elif operation == "DELETE":
                tpr_tree.delete(object_.id, object_.get_coordinates(t_now))
                del objects[object_.id]
            elif operation == "QUERY":
                tree_intersect = set(
                    tpr_tree.intersection(object_.get_coordinates()))

                # Brute intersect
                brute_intersect = set()
                for tree_object in objects.values():
                    x_low, y_low = tree_object.getXY(object_.start_time)
                    x_high, y_high = tree_object.getXY(object_.end_time)

                    if intersects(
                            x_low, y_low, x_high, y_high,  # Line
                            object_.x, object_.y, object_.dx, object_.dy):  # Rect
                        brute_intersect.add(tree_object.id)

                # Tree should match brute force approach
                assert tree_intersect == brute_intersect
示例#4
0
def create_spatial_index(shape_dict):
    print >> sys.stderr, 'Making spatial index...',
    spatial_index = Index()
    for index, (blockid, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=blockid)
    print >> sys.stderr, 'done.'
    return spatial_index
示例#5
0
def create_spatial_index(shape_dict):
    print >> sys.stderr, 'Making spatial index...',
    spatial_index = Index()
    for index, (blockid, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=blockid)
    print >> sys.stderr, 'done.'
    return spatial_index
示例#6
0
def demo_delete():
    seed = 1  # Seed for random points

    countries = get_countries()

    country_id_to_remove = 170  # United States of America
    country_uuids_to_remove = []  # Polygons' ids to remove from the index

    properties = Property()
    # properties.writethrough = True
    # properties.leaf_capacity = 1000
    # properties.fill_factor = 0.5
    index = Index(properties=properties)

    points_per_polygon = 1
    points = []

    # Inserts countries data to the index
    for i, (country_name, geometry) in enumerate(countries):
        for polygon in get_polygons(geometry):
            temp_uuid = uuid.uuid1().int
            index.insert(temp_uuid, polygon.bounds, country_name)

            if i == country_id_to_remove:
                # Saves index ids of the polygon to be removed later
                country_uuids_to_remove.append(temp_uuid)

            # Generates random points in every polygon and saves them
            random_points = gen_random_point(points_per_polygon, polygon, seed)
            points.append((country_name, random_points))

    # Checks every generated point has matches
    for (country_name, country_points) in points:
        for point in country_points:
            hits = list(index.intersection(point.bounds, objects=True))
            assert any(hit.object == country_name for hit in hits)

    # Remove geometry
    geometry = countries[country_id_to_remove][1]
    for i, polygon in enumerate(get_polygons(geometry)):
        index.delete(country_uuids_to_remove[i], polygon.bounds)

    points_missing = []

    # Checks (again) if every generated point has matches
    for (country_name, country_points) in points:
        for point in country_points:
            hits = list(index.intersection(point.bounds, objects=True))
            # Save any point without matches
            if not any(hit.object == country_name for hit in hits):
                points_missing.append(str(point) + " - " + country_name)

    # Print missing points
    for point in points_missing:
        print(point)
示例#7
0
文件: gis.py 项目: willeforce/atlite
def compute_indicatormatrix(orig,
                            dest,
                            orig_proj='latlong',
                            dest_proj='latlong'):
    """
    Compute the indicatormatrix

    The indicatormatrix I[i,j] is a sparse representation of the ratio
    of the area in orig[j] lying in dest[i], where orig and dest are
    collections of polygons, i.e.

    A value of I[i,j] = 1 indicates that the shape orig[j] is fully
    contained in shape dest[j].

    Note that the polygons must be in the same crs.

    Parameters
    ---------
    orig : Collection of shapely polygons
    dest : Collection of shapely polygons

    Returns
    -------
    I : sp.sparse.lil_matrix
      Indicatormatrix
    """

    dest = reproject_shapes(dest, dest_proj, orig_proj)
    indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=np.float)

    try:
        from rtree.index import Index

        idx = Index()
        for j, o in enumerate(orig):
            idx.insert(j, o.bounds)

        for i, d in enumerate(dest):
            for j in idx.intersection(d.bounds):
                o = orig[j]
                area = d.intersection(o).area
                indicator[i, j] = area / o.area

    except ImportError:
        logger.warning(
            "Rtree is not available. Falling back to slower algorithm.")

        dest_prepped = list(map(prep, dest))

        for i, j in product(range(len(dest)), range(len(orig))):
            if dest_prepped[i].intersects(orig[j]):
                area = dest[i].intersection(orig[j]).area
                indicator[i, j] = area / orig[j].area

    return indicator
示例#8
0
def get_rtree(geometries, fp):
    fp = fp.as_posix()
    if not os.path.exists(fp + '.idx'):
        # Populate R-tree index with bounds of geometries
        print('Populate {} tree'.format(fp))
        idx = Index(fp)
        for i, geo in enumerate(geometries):
            idx.insert(i, geo.bounds)
        idx.close()

    return {'rtree': Index(fp), 'geometries': geometries}
示例#9
0
class Mesh2D:
    """!
    The general representation of mesh in Serafin 2D.
    The basis for interpolation, volume calculations etc.
    """
    def __init__(self,
                 input_header,
                 construct_index=False,
                 iter_pbar=lambda x: x):
        """!
        @param input_header <slf.Serafin.SerafinHeader>: input Serafin header
        @param construct_index <bool>: perform the index construction
        @param iter_pbar: iterable progress bar
        """
        self.x, self.y = input_header.x[:input_header.
                                        nb_nodes_2d], input_header.y[:
                                                                     input_header
                                                                     .
                                                                     nb_nodes_2d]
        self.ikle = input_header.ikle_2d - 1  # back to 0-based indexing
        self.triangles = {}
        self.nb_points = self.x.shape[0]
        self.nb_triangles = self.ikle.shape[0]
        self.points = np.stack([self.x, self.y], axis=1)
        if not construct_index:
            self.index = Index()
        else:
            self._construct_index(iter_pbar)

    def _construct_index(self, iter_pbar):
        """!
        Separate the index construction from the constructor, allowing a GUI override
        @param iter_pbar: iterable progress bar
        """
        self.index = Index()
        for i, j, k in iter_pbar(self.ikle, unit='elements'):
            t = Polygon([self.points[i], self.points[j], self.points[k]])
            self.triangles[i, j, k] = t
            self.index.insert(i, t.bounds, obj=(i, j, k))

    def get_intersecting_elements(self, bounding_box):
        """!
        @brief Return the triangles in the mesh intersecting the bounding box
        @param bounding_box <tuple>: (left, bottom, right, top) of a 2d geometrical object
        @return <[tuple]>: The list of triangles (i,j,k) intersecting the bounding box
        """
        return list(self.index.intersection(bounding_box, objects='raw'))
示例#10
0
    def create_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):
            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))
            return k()

        ndim = len(clusters[0].bbox[0]) + 1
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            rtree.insert(idx, c.bbox[0] + (0,) + c.bbox[1] + (1,))
        return rtree
示例#11
0
    def create_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):

            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))

            return k()

        ndim = len(clusters[0].bbox[0]) + 1
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            rtree.insert(idx, c.bbox[0] + (0, ) + c.bbox[1] + (1, ))
        return rtree
示例#12
0
    def construct_rtree(self, clusters):
        if not len(clusters[0].bbox[0]):
            class k(object):
                def intersection(self, foo):
                    return xrange(len(clusters))
            return k()
        ndim = max(2, len(clusters[0].centroid))
        p = RProp()
        p.dimension = ndim
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        rtree = RTree(properties=p)
        for idx, c in enumerate(clusters):
            box = c.bbox #self.scale_box(c.bbox)
            if ndim == 1:
                rtree.insert(idx, box[0] + [0] + box[1] + [1])
            else:
                rtree.insert(idx, box[0] + box[1])
        return rtree
示例#13
0
def snap_to_edge_position(gdf, points, k=3, rtree=None):
    """
    Snap given points in the plane to edges in GeoDataFrame of edges.

    Parameters
    ----------
    gdf : GeoDataframe
        The edges of spatial network as a Geodataframe.
    points : array of floats, shape (M, 2)
        The cartesian coordinates of the points to be snapped.
    k : integer, optional
        Number of nearest edges to consider.

    Returns
    -------
    nearest_edges : list of integers, length M
        Indices of nearest edges in the GeoDataframe.
    refdistances : list of floats, length M
        Linear referencing distances of points along nearest edge.
    """
    X, Y = points.T
    geom = gdf["geometry"]

    # If not passed, build the r-tree spatial index by position for subsequent iloc
    if rtree == None:
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geom.bounds.values):
            rtree.insert(pos, bounds)

    # use r-tree to find possible nearest neighbors, one point at a time,
    # then minimize euclidean distance from point to the possible matches
    nearest_edges = list()
    refdistances = list()
    for xy in zip(X, Y):
        p = Point(xy)
        dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p)
        ne = geom[dists.idxmin()]
        nearest_edges.append(dists.idxmin())
        refdistances.append(ne.project(p))

    return nearest_edges, refdistances
示例#14
0
def local_search(points, bounding_box, iterations):
    labeled_points = [p for p in points if p.text]

    items = []
    items.extend([p.label for p in labeled_points])
    items.extend(points)
    items.extend(bounding_box.border_config)

    idx = Index()
    for i, item in enumerate(items):
        item.index = i
        idx.insert(item.index, item.box)

    for i in range(iterations):
        for lp in labeled_points:
            best_candidate = None
            min_penalty = None
            for lc1 in lp.label_candidates:
                penalty = POSITION_WEIGHT * lc1.position

                # Check overlap with other labels and points
                intersecting_item_ids = idx.intersection(lc1.box)
                for item_id in intersecting_item_ids:
                    item = items[item_id]
                    if hasattr(item, "point") and lc1.point == item.point:
                        continue
                    penalty += item.overlap(lc1)

                if min_penalty is None or penalty < min_penalty:
                    min_penalty = penalty
                    best_candidate = lc1

            # Remove the old label from the index
            idx.delete(lp.label.index, lp.label.box)

            # Select the new label
            best_candidate.select()

            # Add the new label to the index and item list
            idx.insert(len(items), lp.label.box)
            items.append(lp.label)
示例#15
0
    def build_cache(self):
        label_candidates = []
        for p in self.points:
            label_candidates.extend(p.label_candidates)
        items = []
        items.extend(label_candidates)
        items.extend(self.points)
        items.extend(self.bounding_box.border_config)

        idx = Index()
        for i, item in enumerate(items):
            item.index = i
            idx.insert(i, item.box)

        for lc in label_candidates:
            lc.penalty = POSITION_WEIGHT * lc.position
            lc.label_penalties = [0 for i in range(len(label_candidates))]
            intersecting_item_ids = idx.intersection(lc.box)
            bbox_counted = False

            for item_id in intersecting_item_ids:
                item = items[item_id]

                if item == lc or item == lc.point:
                    continue

                if isinstance(item, Label):
                    if lc.point == item.point:
                        continue
                    else:
                        lc.label_penalties[item.index] = item.overlap(lc)
                        continue

                if isinstance(item, BoundingBoxBorder):
                    if bbox_counted:
                        continue
                    bbox_counted = True

                lc.penalty += item.overlap(lc)
示例#16
0
class PolyStore(object):
    def __init__(self):
        self.index = Index()

    def load_from_shapefile(self, sf):
        self.shapes = sf.shapes()
        self.records = sf.records()
        for index, shape in enumerate(self.shapes):
            if len(shape.parts) > 1:
                print self.records[index], len(shape.parts)
            self.index.insert(index, shape.bbox)

    def get_shape_at_point(self, (x, y)):
        candidates = self.index.intersection((x, y, x, y))
        for candidate in candidates:
            shape = self.shapes[candidate]
            for i, part in enumerate(shape_to_parts_list(shape)):
                if Polygon(part).contains(Point(x, y)):
                    if i in SHAPE_LAND.get(self.records[candidate][4], []):
                        return '0'
                    return self.records[candidate][4]
        return None
示例#17
0
class PolyStore(object):
    def __init__(self):
        self.index = Index()

    def load_from_shapefile(self, sf):
        self.shapes = sf.shapes()
        self.records = sf.records()
        for index, shape in enumerate(self.shapes):
            if len(shape.parts) > 1:
                print self.records[index], len(shape.parts)
            self.index.insert(index, shape.bbox)

    def get_shape_at_point(self, (x, y)):
        candidates = self.index.intersection((x, y, x, y))
        for candidate in candidates:
            shape = self.shapes[candidate]
            for i, part in enumerate(shape_to_parts_list(shape)):
                if Polygon(part).contains(Point(x, y)):
                    if i in SHAPE_LAND.get(self.records[candidate][4], []):
                        return '0'
                    return self.records[candidate][4]
        return None
示例#18
0
def evaluate_labels(labels, points, bounding_box):
    items = []
    items.extend(labels)
    items.extend(points)
    items.extend(bounding_box.border_config)

    t1 = time.clock()
    idx = Index()

    for i, item in enumerate(items):
        item.index = i
        idx.insert(i, item.box)

    t2 = time.clock()
    # print(f"Index creation: {t2-t1}")

    # Update penalties for overlap with other objects
    penalties = [evaluate_label(l, items, idx) for l in labels]

    t3 = time.clock()
    # print(f"Overlap checking: {t3 - t2}")

    print(f"Total time: {t3 - t1}")
    return penalties
示例#19
0
class StreetIndex(object):
    def __init__(self, streets_file):
        self.idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = street['properties']['id']
                street_shape = asShape(street['geometry'])
                for i in range(len(street_shape.geoms)):
                    seg_id = self.encode_seg_id(i, street_id)
                    self.idx.insert(seg_id, street_shape.geoms[i].coords[0])
                    self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1])

        self.bb_idx = Index()
        with open(streets_file) as f:
            for line in f.readlines():
                street = json.loads(line)
                street_id = int(street['properties']['id'])
                street_shape = asShape(street['geometry'])
                self.bb_idx.insert(street_id, list(street_shape.bounds))

    def encode_seg_id(self, i, street_id):
        return i * 1000000 + int(street_id)

    def decode_seg_id(self, seg_id):
        i = abs(seg_id) / 1000000
        return abs(seg_id) - i

    def find_nearest_street(self, shape):
        shape = asShape(shape['geometry'])
        shape_type = shape.geom_type
        if shape_type == 'Polygon' or shape_type == 'MultiPolygon':
            ref_point = (
                float(shape.centroid.coords.xy[0][0]),
                float(shape.centroid.coords.xy[1][0])
            )
        else:
            ref_point = (
                float(shape.coords.xy[0][0]),
                float(shape.coords.xy[1][0])
            )
        street_id = list(self.bb_idx.nearest(ref_point))[0]
        return str(street_id)

    def find_connected_street(self, street):
        street_id = int(street['properties']['id'])
        street_shape = asShape(street['geometry'])
        street_start = street_shape.geoms[0].coords[0]
        street_end = street_shape.geoms[-1].coords[-1]
        seg_ids = list(self.idx.intersection(street_start))
        seg_ids += list(self.idx.intersection(street_end))
        street_ids = set(map(self.decode_seg_id, seg_ids))
        if street_id in street_ids:
            street_ids.remove(street_id)
        return street_ids
示例#20
0
class DyClee:
    """
    Implementation roughly as per https://doi.org/10.1016/j.patcog.2019.05.024.
    """
    def __init__(self, context: DyCleeContext):
        self.context = context

        self.dense_µclusters: Set[MicroCluster] = Set()
        self.semidense_µclusters: Set[MicroCluster] = Set()
        self.outlier_µclusters: Set[MicroCluster] = Set()
        self.long_term_memory: Set[MicroCluster] = Set()
        self.eliminated: Set[MicroCluster] = Set()

        self.next_µcluster_index: int = 0
        self.next_class_label: int = 0
        self.n_steps: int = 0
        self.last_partitioning_step: int = 0
        self.last_density_step: int = 0

        if self.context.maintain_rtree:
            p = RTreeProperty(dimension=self.context.n_features)
            self.rtree = RTreeIndex(properties=p)
            # This mapping is used to retrieve microcluster objects from their hashes
            # stored with their locations in the R*-tree
            self.µcluster_map: Optional[dict[int, MicroCluster]] = {}
        else:
            self.rtree = None
            self.µcluster_map = None

    @property
    def active_µclusters(self) -> Set[MicroCluster]:
        return self.dense_µclusters | self.semidense_µclusters

    @property
    def all_µclusters(self) -> Set[MicroCluster]:
        return self.active_µclusters | self.outlier_µclusters | self.long_term_memory

    def get_next_µcluster_index(self) -> int:
        index = self.next_µcluster_index
        self.next_µcluster_index += 1
        return index

    def get_next_class_label(self) -> int:
        label = self.next_class_label
        self.next_class_label += 1
        return label

    def update_density_partitions(self, time: Timestamp) -> Set[MicroCluster]:
        densities = np.array(
            [µcluster.density(time) for µcluster in self.all_µclusters])
        mean_density = np.mean(densities)
        median_density = np.median(densities)

        dense: Set[MicroCluster] = Set()
        semidense: Set[MicroCluster] = Set()
        outliers: Set[MicroCluster] = Set()
        memory: Set[MicroCluster] = Set()
        eliminated: Set[MicroCluster] = Set()

        for µcluster in self.all_µclusters:
            density = µcluster.density(time)

            if mean_density <= density >= median_density:
                # Any may become dense
                dense.add(µcluster)
                µcluster.once_dense = True
            elif (µcluster in self.dense_µclusters
                  or µcluster in self.semidense_µclusters
                  or µcluster in self.outlier_µclusters) and (
                      density >= mean_density) != (density >= median_density):
                # Dense and outliers may become dense
                # Semi-dense may stay semi-dense
                semidense.add(µcluster)
            elif ((µcluster in self.dense_µclusters
                   or µcluster in self.semidense_µclusters)
                  and mean_density > density < median_density) or (
                      µcluster in self.outlier_µclusters
                      and density >= self.context.elimination_threshold):
                # Dense and semi-dense may become outliers
                # Outliers may stay outliers
                outliers.add(µcluster)
            elif (self.context.long_term_memory
                  and µcluster in self.outlier_µclusters
                  and µcluster.once_dense):
                # Outliers may be put into long-term memory
                memory.add(µcluster)
            else:
                # If none of the conditions are met, the microcluster is eliminated
                eliminated.add(µcluster)

                if self.context.maintain_rtree:
                    # Remove microcluster from R*-tree
                    self.rtree.delete(hash(µcluster), µcluster.bounding_box)

        # Store the final sets, sorting by index for predictable ordering
        self.dense_µclusters = Set(sorted(dense, key=lambda µ: µ.index))
        self.semidense_µclusters = Set(sorted(semidense,
                                              key=lambda µ: µ.index))
        self.outlier_µclusters = Set(sorted(outliers, key=lambda µ: µ.index))
        self.long_term_memory = Set(sorted(memory, key=lambda µ: µ.index))

        if self.context.store_elements:
            # Keep track of eliminated microclusters (to not lose elements)
            self.eliminated |= eliminated

        return eliminated

    def distance_step(self, element: Element, time: Timestamp) -> MicroCluster:
        if self.context.update_ranges:
            self.context.update_feature_ranges(element)

        if not self.all_µclusters:
            # Create new microcluster
            µcluster = MicroCluster(element,
                                    time,
                                    context=self.context,
                                    index=self.get_next_µcluster_index())
            self.outlier_µclusters.add(µcluster)

            if self.context.maintain_rtree:
                # Add microcluster to R*-tree
                self.µcluster_map[hash(µcluster)] = µcluster
                self.rtree.insert(hash(µcluster), µcluster.bounding_box)

            return µcluster
        else:
            closest: Optional[MicroCluster] = None

            if self.context.distance_index == SpatialIndexMethod.RTREE:
                # The R*-tree searches all microclusters regardless of precedence, so we
                # need to filter by priority after the index search

                # Find all reachable microclusters
                matches: Set[MicroCluster] = Set([
                    self.µcluster_map[hash_]
                    for hash_ in self.rtree.intersection((*element, *element))
                ])

                min_dist = None

                for candidate_µclusters in (self.active_µclusters,
                                            self.outlier_µclusters,
                                            self.long_term_memory):
                    # First match active microclusters, then others

                    for µcluster in matches & candidate_µclusters:
                        dist = µcluster.distance(element)

                        if (closest is None or dist < min_dist or
                            (dist == min_dist and
                             µcluster.density(time) > closest.density(time))):
                            closest = µcluster
                            min_dist = dist
            else:
                for candidate_µclusters in (self.active_µclusters,
                                            self.outlier_µclusters,
                                            self.long_term_memory):
                    # First search actives, then others for reachable microclusters

                    if not candidate_µclusters:
                        continue

                    if self.context.distance_index == SpatialIndexMethod.KDTREE:
                        # Ensure predictable order for indexability
                        candidate_µclusters = list(candidate_µclusters)

                        candidate_centroids: np.ndarray = np.row_stack([
                            µcluster.centroid
                            for µcluster in candidate_µclusters
                        ])

                        # Find potentially reachable microclusters (using L-inf norm)
                        idcs, = KDTree(
                            candidate_centroids, p=np.inf).query_radius(
                                np.reshape(element, (1, -1)),
                                self.context.potentially_reachable_radius)

                        if not len(idcs):
                            continue

                        min_dist = None

                        # Find closest (L-1 norm) microcluster among the reachable ones
                        for i in idcs:
                            µcluster = candidate_µclusters[i]

                            if not µcluster.is_reachable(element):
                                continue

                            dist = µcluster.distance(element)

                            # Higher density is tie-breaker in case of equal distances
                            if (closest is None or dist < min_dist or
                                (dist == min_dist and µcluster.density(time) >
                                 closest.density(time))):
                                closest = µcluster
                                min_dist = dist
                    else:
                        # Brute force
                        min_dist = None

                        for µcluster in candidate_µclusters:
                            if not µcluster.is_reachable(element):
                                continue

                            dist = µcluster.distance(element)

                            if (closest is None or dist < min_dist or
                                (dist == min_dist and µcluster.density(time) >
                                 closest.density(time))):
                                closest = µcluster
                                min_dist = dist

                    if closest is not None:
                        # Match found, no need to check next set
                        break

            if closest is not None:
                if self.context.maintain_rtree:
                    # Remove microcluster from R*-tree
                    self.rtree.delete(hash(closest), closest.bounding_box)

                # Add element to closest microcluster
                closest.add(element, time)

                if self.context.maintain_rtree:
                    # Add modified microcluster to R*-tree
                    self.rtree.insert(hash(closest), closest.bounding_box)

                return closest
            else:
                # Create new microcluster
                µcluster = MicroCluster(element,
                                        time,
                                        context=self.context,
                                        index=self.get_next_µcluster_index())
                self.outlier_µclusters.add(µcluster)

                if self.context.maintain_rtree:
                    # Add microcluster to R*-tree
                    self.µcluster_map[hash(µcluster)] = µcluster
                    self.rtree.insert(hash(µcluster), µcluster.bounding_box)

                return µcluster

    def global_density_step(
            self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]:
        clusters: list[Cluster] = []
        seen: Set[MicroCluster] = Set()

        for µcluster in self.dense_µclusters:
            if µcluster in seen:
                continue

            seen.add(µcluster)

            if µcluster.label is None:
                µcluster.label = self.get_next_class_label()

            cluster = Cluster(µcluster, time)
            clusters.append(cluster)

            # Get dense and semi-dense directly connected neighbours
            connected = µcluster.get_neighbours(
                (self.dense_µclusters | self.semidense_µclusters) - seen,
                rtree_index=self.rtree,
                µcluster_map=self.µcluster_map)

            while connected:
                neighbour = connected.pop()

                if neighbour in seen:
                    continue

                seen.add(neighbour)

                # Outlier microclusters are ignored
                if neighbour in self.outlier_µclusters:
                    continue

                # Dense and semi-dense microclusters become part of the cluster
                neighbour.label = µcluster.label
                cluster.add(neighbour, time)

                # Semi-dense neighbours may only form the boundary
                if neighbour not in self.dense_µclusters:
                    continue

                # Get neighbour's dense and semi-dense directly connected neighbours
                # and add to set of microclusters connected to the parent
                connected |= neighbour.get_neighbours(
                    (self.dense_µclusters | self.semidense_µclusters) - seen,
                    rtree_index=self.rtree,
                    µcluster_map=self.µcluster_map)

        # Find all microclusters that were not grouped into a cluster
        unclustered = self.all_µclusters
        for cluster in clusters:
            unclustered -= cluster.µclusters

        return clusters, unclustered

    def local_density_step(
            self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]:
        raise NotImplementedError("TODO")

    def density_step(
            self, time: Timestamp) -> tuple[list[Cluster], Set[MicroCluster]]:
        if self.context.multi_density:
            return self.local_density_step(time)
        else:
            return self.global_density_step(time)

    def step(
        self,
        element: Element,
        time: Timestamp,
        skip_density_step: bool = False
    ) -> tuple[MicroCluster, Optional[list[Cluster]],
               Optional[Set[MicroCluster]], Optional[Set[MicroCluster]]]:
        self.n_steps += 1

        µcluster = self.distance_step(element, time)

        if (self.n_steps >= self.last_partitioning_step +
                self.context.partitioning_interval):
            eliminated = self.update_density_partitions(time)

            self.last_partitioning_step = self.n_steps
        else:
            eliminated = None

        if (not skip_density_step and self.n_steps >=
                self.last_density_step + self.context.density_interval):
            clusters, unclustered = self.density_step(time)

            self.last_density_step = self.n_steps
        else:
            clusters = None
            unclustered = None

        return µcluster, clusters, unclustered, eliminated

    def run(self,
            elements: Iterable[Element],
            times: Optional[Iterable[Timestamp]] = None,
            progress: bool = True) -> list[Cluster]:
        if progress and tqdm is not None:
            elements = tqdm(elements)

        if times is None:
            times = count()

        for element, time in zip(elements, times):
            self.step(element, time, skip_density_step=True)

        clusters, _ = self.density_step(time)

        return clusters
示例#21
0
class AdjacencyVersion(object):

  def __init__(self, feature_mapper):
    #self.partitions_complete = partitions_complete
    self.cid = 0
    self.disc_idxs = {}
    self.feature_mapper = feature_mapper
    self.radius = .15
    self.metric = 'hamming'

    self._rtree = None  # internal datastructure
    self._ndim = None
    self.clusters = []
    self.id2c = dict()
    self.c2id = dict()

  def to_json(self):
    data = {
            'clusters' : [c and c.__dict__ or None for c in self.clusters],
            'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()],
            'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()],
            'cid' : self.cid,
            '_ndim' : self._ndim,
            '_rtreename' : 'BLAH'
            }
    return json.dumps(data)

  def from_json(self, encoded):
    data = json.loads(encoded)
    self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']]
    self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']])
    self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']])
    self.cid = data['cid']
    self._ndim = data['_ndim']
    self._rtree = None

  def setup_rtree(self, ndim, clusters=None):
    if self._rtree:
        return self._rtree

    self._ndim = ndim
    if not ndim:
        class k(object):
            def __init__(self, graph):
                self.graph = graph
            def insert(self, *args, **kwargs):
                pass
            def delete(self, *args, **kwargs):
                pass
            def intersection(self, *args, **kwargs):
                return xrange(len(self.graph.clusters))
        self._rtree = k(self)
        return self._rtree


    p = RProp()
    p.dimension = max(2, ndim)
    p.dat_extension = 'data'
    p.idx_extension = 'index'

    if clusters:
        gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters))
        self._rtree = RTree(gen_func, properties=p)
    else:
        self._rtree = RTree(properties=p)
    return self._rtree

  def bbox_rtree(self, cluster, enlarge=0.):
    cols = cluster.cols
    bbox = cluster.bbox
    lower, higher = map(list, bbox)
    if self._ndim == 1:
      lower.append(0)
      higher.append(1)

    if enlarge != 0:
      for idx, col in enumerate(cols):
        rng = enlarge * self.feature_mapper.ranges[col]
        lower[idx] -= rng
        higher[idx] += rng

    bbox = lower + higher
    return bbox

  def insert_rtree(self, idx, cluster):
    self.setup_rtree(len(cluster.bbox[0]))
    self._rtree.insert(idx,self.bbox_rtree(cluster))
    return cluster

  def remove_rtree(self, idx, cluster):
    self.setup_rtree(len(cluster.bbox[0]))
    self._rtree.delete(idx, self.bbox_rtree(cluster))
    return cluster

  def search_rtree(self, cluster):
    self.setup_rtree(len(cluster.bbox[0]))
    bbox = self.bbox_rtree(cluster, enlarge=0.01)
    return self._rtree.intersection(bbox)
    res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)]
    return filter(bool, res)

  def bulk_init(self, clusters):
    if not clusters: return

    self.setup_rtree(len(clusters[0].bbox[0]), clusters)
    self.clusters = clusters
    for cid, c in enumerate(clusters):
      self.id2c[cid] = c
      self.c2id[c] = cid
    
    for dim in self.feature_mapper.attrs:
      Xs = []
      for cidx, c in enumerate(clusters):
        Xs.append(self.feature_mapper(c, dim))
      idx = NearestNeighbors(
          radius=self.radius, 
          algorithm='ball_tree', 
          metric=self.metric
      )
      self.disc_idxs[dim] = idx
      self.disc_idxs[dim].fit(np.array(Xs))

  def contains(self, cluster):
    return cluster in self.c2id
  
  def remove(self, cluster):
    if cluster in self.c2id:
      cid = self.c2id[cluster]
      self.remove_rtree(cid, cluster)
      del self.c2id[cluster]
      del self.id2c[cid]
      self.clusters[cid] = None
      return True
    return False


  def neighbors(self, cluster):
    ret = None
    for name, vals in cluster.discretes.iteritems():
      if name not in self.disc_idxs:
        return []
      vect = self.feature_mapper(cluster, name)
      index = self.disc_idxs[name]
      dists, idxs = index.radius_neighbors(vect, radius=self.radius)
      idxs = set(idxs[0].tolist())

      if ret is None:
        ret = idxs
      else:
        ret.intersection_update(idxs)
        #ret.update(idxs)
      if not ret: return []

    idxs = self.search_rtree(cluster)
    if ret is None:
      ret = set(idxs)
    else:
      ret.intersection_update(set(idxs))

    return filter(bool, [self.clusters[idx] for idx in ret])


  """
示例#22
0
def create_spatial_index(shape_dict):

    spatial_index = Index()
    for index, (name, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=name)
    return spatial_index
示例#23
0
def create_spatial_index(shape_dict):

    spatial_index = Index()
    for index, (name, shape) in enumerate(shape_dict.iteritems()):
        spatial_index.insert(index, shape.bounds, obj=name)
    return spatial_index
示例#24
0
def nearest_edges(G, X, Y, interpolate=None, return_dist=False):
    """
    Find the nearest edge to a point or to each of several points.

    If `X` and `Y` are single coordinate values, this will return the nearest
    edge to that point. If `X` and `Y` are lists of coordinate values, this
    will return the nearest edge to each point.

    If `interpolate` is None, search for the nearest edge to each point, one
    at a time, using an r-tree and minimizing the euclidean distances from the
    point to the possible matches. For accuracy, use a projected graph and
    points. This method is precise and also fastest if searching for few
    points relative to the graph's size.

    For a faster method if searching for many points relative to the graph's
    size, use the `interpolate` argument to interpolate points along the edges
    and index them. If the graph is projected, this uses a k-d tree for
    euclidean nearest neighbor search, which requires that scipy is installed
    as an optional dependency. If graph is unprojected, this uses a ball tree
    for haversine nearest neighbor search, which requires that scikit-learn is
    installed as an optional dependency.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        graph in which to find nearest edges
    X : float or list
        points' x (longitude) coordinates, in same CRS/units as graph and
        containing no nulls
    Y : float or list
        points' y (latitude) coordinates, in same CRS/units as graph and
        containing no nulls
    interpolate : float
        spacing distance between interpolated points, in same units as graph.
        smaller values generate more points.
    return_dist : bool
        optionally also return distance between points and nearest edges

    Returns
    -------
    ne or (ne, dist) : tuple or list
        nearest edges as (u, v, key) or optionally a tuple where `dist`
        contains distances between the points and their nearest edges
    """
    is_scalar = False
    if not (hasattr(X, "__iter__") and hasattr(Y, "__iter__")):
        # make coordinates arrays if user passed non-iterable values
        is_scalar = True
        X = np.array([X])
        Y = np.array([Y])

    if np.isnan(X).any() or np.isnan(Y).any():  # pragma: no cover
        raise ValueError("`X` and `Y` cannot contain nulls")
    geoms = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"]

    # if no interpolation distance was provided
    if interpolate is None:

        # build the r-tree spatial index by position for subsequent iloc
        rtree = RTreeIndex()
        for pos, bounds in enumerate(geoms.bounds.values):
            rtree.insert(pos, bounds)

        # use r-tree to find possible nearest neighbors, one point at a time,
        # then minimize euclidean distance from point to the possible matches
        ne_dist = list()
        for xy in zip(X, Y):
            dists = geoms.iloc[list(rtree.nearest(xy))].distance(Point(xy))
            ne_dist.append((dists.idxmin(), dists.min()))
        ne, dist = zip(*ne_dist)

    # otherwise, if interpolation distance was provided
    else:

        # interpolate points along edges to index with k-d tree or ball tree
        uvk_xy = list()
        for uvk, geom in zip(geoms.index, geoms.values):
            uvk_xy.extend(
                (uvk, xy)
                for xy in utils_geo.interpolate_points(geom, interpolate))
        labels, xy = zip(*uvk_xy)
        vertices = pd.DataFrame(xy, index=labels, columns=["x", "y"])

        if projection.is_projected(G.graph["crs"]):
            # if projected, use k-d tree for euclidean nearest-neighbor search
            if cKDTree is None:  # pragma: no cover
                raise ImportError(
                    "scipy must be installed to search a projected graph")
            dist, pos = cKDTree(vertices).query(np.array([X, Y]).T, k=1)
            ne = vertices.index[pos]

        else:
            # if unprojected, use ball tree for haversine nearest-neighbor search
            if BallTree is None:  # pragma: no cover
                raise ImportError(
                    "scikit-learn must be installed to search an unprojected graph"
                )
            # haversine requires lat, lng coords in radians
            vertices_rad = np.deg2rad(vertices[["y", "x"]])
            points_rad = np.deg2rad(np.array([Y, X]).T)
            dist, pos = BallTree(vertices_rad,
                                 metric="haversine").query(points_rad, k=1)
            dist = dist[:, 0] * EARTH_RADIUS_M  # convert radians -> meters
            ne = vertices.index[pos[:, 0]]

    # convert results to correct types for return
    ne = list(ne)
    dist = list(dist)
    if is_scalar:
        ne = ne[0]
        dist = dist[0]

    if return_dist:
        return ne, dist
    else:
        return ne
示例#25
0
def main(input_dir, output_dir):
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s [%(name)s]: %(message)s')
    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    city_names = []
    rtree = RTreeIndex()

    cities_filename = os.path.join(tempfile.gettempdir(), 'cities.json')

    subprocess.check_call([
        'wget',
        'https://raw.githubusercontent.com/mapzen/metroextractor-cities/master/cities.json',
        '-O', cities_filename
    ])

    all_cities = json.load(open(cities_filename))

    i = 0

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            bbox = data['bbox']
            rtree.insert(i, (float(bbox['left']), float(
                bbox['bottom']), float(bbox['right']), float(bbox['top'])))
            city_names.append(city)
            i += 1

    files = {
        name:
        open(os.path.join(output_dir, 'cities', '{}.geojson'.format(name)),
             'w')
        for name in city_names
    }

    planet = open(os.path.join(output_dir, 'planet.geojson'), 'w')
    planet_addresses_only = open(
        os.path.join(output_dir, 'planet_addresses_only.json'), 'w')

    i = 0
    seen = set()

    for url, canonical, venues in gen_venues(input_dir):
        domain = urlparse.urlsplit(url).netloc.strip('www.')
        for props in venues:
            lat = props.get('latitude')
            lon = props.get('longitude')
            props['canonical'] = canonical
            props['url'] = url
            street = props.get('street_address')
            name = props.get('name')
            planet_hash = hashlib.md5(u'|'.join(
                (name, street, str(lat), str(lon),
                 domain)).encode('utf-8')).digest()
            address_hash = hashlib.md5(u'|'.join(
                (name, street, domain)).encode('utf-8')).digest()
            props['guid'] = props.get('guid', random_guid())
            venue = venue_to_geojson(props)
            if lat is not None and lon is not None:
                try:
                    lat = float(lat)
                    lon = float(lon)
                except Exception:
                    lat = None
                    lon = None
            if lat is not None and lon is not None and planet_hash not in seen:
                cities = list(rtree.intersection((lon, lat, lon, lat)))
                if cities:
                    for c in cities:
                        f = files[city_names[c]]
                    f.write(json.dumps(venue) + '\n')
                if planet_hash not in seen:
                    planet.write(json.dumps(venue) + '\n')
                    seen.add(planet_hash)
            if address_hash not in seen:
                planet_addresses_only.write(json.dumps(props) + '\n')
                seen.add(address_hash)
            i += 1
            if i % 1000 == 0 and i > 0:
                logger.info('did {}'.format(i))

    logger.info('Creating manifest files')

    manifest_files = []

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            f = files[city]
            if f.tell() == 0:
                f.close()
                os.unlink(
                    os.path.join(output_dir, 'cities',
                                 '{}.geojson'.format(city)))
                continue

            bbox = data['bbox']
            lat = midpoint(float(bbox['top']), float(bbox['bottom']))
            lon = midpoint(float(bbox['left']), float(bbox['right']))

            manifest_files.append({
                'latitude':
                lat,
                'longitude':
                lon,
                'file':
                '{}.geojson'.format(city),
                'name':
                city.replace('_', ', ').replace('-', ' ').title()
            })

    manifest = {'files': manifest_files}

    json.dump(manifest, open(os.path.join(output_dir, 'manifest.json'), 'w'))

    logger.info('Done!')
示例#26
0
}

# In[2]:

osm_land_use_idx = osm_land_use.sindex
# osm_roads_idx = osm_roads.sindex
# osm_waterways_idx = osm_waterways.sindex
# osm_water_idx = osm_water.sindex
# osm_traffic_idx = osm_traffic.sindex
# osm_transport_idx = osm_transport.sindex
# acled_idx = acled.sindex

idx = Index("./index/osm_land_use_idx")
print("here")
for i, k in list(osm_land_use_idx):
    idx.insert(i, k)
idx.close()
print("Done with osm_land_use_idx")

# idx = Index("./index/osm_roads_idx")
# idx.insert(osm_roads_idx)
# idx.close()
# print("Done with osm_roads_idx")

# idx = Index("./index/osm_waterways_idx")
# idx.insert(osm_waterways_idx)
# idx.close()
# print("Done with osm_waterways_idx")

# idx = Index("./index/osm_water_idx")
# idx.insert(osm_water_idx)
示例#27
0
class AdjacencyGraph(object):
    def __init__(self, clusters, partitions_complete=True):
        self.partitions_complete = partitions_complete
        self.graph = defaultdict(set)
        self.cid = 0
        self.clusters = []
        self.id2c = dict()
        self.c2id = dict()
        self._rtree = None  # internal datastructure
        self._ndim = None

        self.bulk_init(clusters)

    def to_json(self):
        data = {
                'clusters' : [c and c.__dict__ or None for c in self.clusters],
                'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()],
                'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()],
                'graph' : [(key.__dict__, [val.__dict__ for val in vals]) for key, vals in self.graph.itemsiter()],
                'cid' : self.cid,
                '_ndim' : self._ndim,
                '_rtreename' : 'BLAH'
                }
        return json.dumps(data)

    def from_json(self, encoded):
        data = json.loads(encoded)
        self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']]
        self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']])
        self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']])
        self.graph = dict([(Cluster.from_dict(key), map(Cluster.from_dict, vals)) for key, vals in data['graph']])
        self.cid = data['cid']
        self._ndim = data['_ndim']
        self._rtree = None

    def setup_rtree(self, ndim, clusters=None):
        if self._rtree:
            return self._rtree

        self._ndim = ndim
        if not ndim:
            class k(object):
                def __init__(self, graph):
                    self.graph = graph
                def insert(self, *args, **kwargs):
                    pass
                def delete(self, *args, **kwargs):
                    pass
                def intersection(self, *args, **kwargs):
                    return xrange(len(self.graph.clusters))
            self._rtree = k(self)
            return self._rtree
 

        p = RProp()
        p.dimension = max(2, ndim)
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        if clusters:
            gen_func = ((i, self.bbox_rtree(c, enlarge=0.00001), None) for i, c in enumerate(clusters))
            self._rtree = RTree(gen_func, properties=p)
        else:
            self._rtree = RTree(properties=p)
        return self._rtree

    def bbox_rtree(self, cluster, enlarge=0.):
        bbox = cluster.bbox
        lower, higher = map(list, bbox)
        if self._ndim == 1:
            lower.append(0)
            higher.append(1)

        if enlarge != 1.:
            lower = [v - enlarge for v in lower]
            higher = [v + enlarge for v in higher]

        bbox = lower + higher
        return bbox

    def insert_rtree(self, idx, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        self._rtree.insert(idx,self.bbox_rtree(cluster))
        return cluster

    def remove_rtree(self, idx, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        self._rtree.delete(idx, self.bbox_rtree(cluster))
        return cluster

    def search_rtree(self, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        bbox = self.bbox_rtree(cluster, enlarge=0.00001)
        res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)]
        return filter(bool, res)

    def bulk_init(self, clusters):
        if clusters:
            self.setup_rtree(len(clusters[0].bbox[0]), clusters)

        self.clusters.extend(clusters)
        for cid, c in enumerate(clusters):
            self.id2c[cid] = c
            self.c2id[c] = cid

        for idx, c in enumerate(clusters):
            for n in self.search_rtree(c):
                if self.c2id[n] <= idx: continue
                if c.discretes_contains(n) and box_completely_contained(c.bbox, n.bbox): continue
                if not c.adjacent(n, 0.8): continue
                self.graph[c].add(n)
                self.graph[n].add(c)



    def insert(self, cluster):
        if cluster in self.graph:
            return

        self.graph[cluster] = set()
        #for o in self.search_rtree(cluster):
        for o in self.graph.keys():
            if cluster == o:
                continue
            if cluster.adjacent(o, 0.8) or (volume(intersection_box(cluster.bbox, o.bbox)) > 0 and not cluster.contains(o)):
                self.graph[cluster].add(o)
                self.graph[o].add(cluster)
        

        cid = len(self.clusters)
        self.clusters.append(cluster)
        self.id2c[cid] = cluster
        self.c2id[cluster] = cid
        self.insert_rtree(cid, cluster)

    def remove(self, cluster):
        if cluster not in self.graph:
            return

        try:
            for neigh in self.graph[cluster]:
                if not neigh == cluster:
                    self.graph[neigh].remove(cluster)
        except:
            pdb.set_trace()
        del self.graph[cluster]

        cid = self.c2id[cluster]
        self.remove_rtree(cid, cluster)
        del self.c2id[cluster]
        del self.id2c[cid]
        self.clusters[cid] = None

    def neighbors(self, cluster):
        if not self.partitions_complete:
            return filter(bool, self.clusters)

        if cluster in self.graph:
            return self.graph[cluster]

        ret = set()
        intersects = self.search_rtree(cluster)
        for key in filter(cluster.adjacent, intersects):
            if box_completely_contained(key.bbox, cluster.bbox):
                continue
            ret.update(self.graph[key])
        return ret
示例#28
0
def isolation(
    X,
    coordinates,
    metric="euclidean",
    middle="mean",
    return_all=False,
    progressbar=False,
):
    """
    Compute the isolation of each value of X by constructing the distance
    to the nearest higher value in the data.

    Parameters
    ----------
    X : numpy.ndarray
        (N, p) array of data to use as input. If p > 1, the "elevation" is computed
        using the topo.to_elevation function.
    coordinates : numpy.ndarray
        (N,k) array of locations for X to compute distances. If metric='precomputed', this
        should contain the distances from each point to every other point, and k == N.
    metric : string or callable (default: 'euclidean')
        name of distance metric in scipy.spatial.distance, or function, that can be
        used to compute distances between locations. If 'precomputed', ad-hoc function
        will be defined to look up distances between points instead.
    middle : string or callable (default: 'mean')
        method to define the elevation of points. See to_elevation for more details.
    return_all : bool (default: False)
        if False, only return the isolation (distance to nearest higher value).
    progressbar: bool (default: False)
        if True, show a progressbar for the computation.
    Returns
    -------
    either (N,) array of isolation values, or a pandas dataframe containing the full
    tree of precedence for the isolation tree.
    """
    X = check_array(X, ensure_2d=False)
    X = to_elevation(X, middle=middle).squeeze()
    try:
        from rtree.index import Index as SpatialIndex
    except ImportError:
        raise ImportError(
            "rtree library must be installed to use the prominence measure"
        )
    distance_func = _resolve_metric(X, coordinates, metric)
    sort_order = numpy.argsort(-X)
    tree = SpatialIndex()
    ix = sort_order[0]
    tree.insert(0, tuple(coordinates[ix]), obj=X[ix])
    precedence_tree = [[ix, numpy.nan, 0, numpy.nan, numpy.nan, numpy.nan]]

    if progressbar and HAS_TQDM:
        pbar = tqdm
    elif progressbar and (not HAS_TQDM):
        try:
            import tqdm
        except ImportError as e:
            raise ImportError("the tqdm module is required for progressbars")
    else:
        pbar = _passthrough

    for iter_ix, ix in pbar(enumerate(sort_order[1:])):
        rank = iter_ix + 1
        value = X[ix]
        location = coordinates[
            ix,
        ]
        (match,) = tree.nearest(tuple(location), objects=True)
        higher_rank = match.id
        higher_value = match.object
        higher_location = match.bbox[:2]
        higher_ix = sort_order[higher_rank]
        distance = distance_func(location, higher_location)
        gap = higher_value - value
        precedence_tree.append([ix, higher_ix, rank, higher_rank, distance, gap])
        tree.insert(rank, tuple(location), obj=value)
    # return precedence_tree
    precedence_tree = numpy.asarray(precedence_tree)
    # print(precedence_tree.shape)
    out = numpy.empty_like(precedence_tree)
    out[sort_order] = precedence_tree
    result = pandas.DataFrame(
        out,
        columns=["index", "parent_index", "rank", "parent_rank", "isolation", "gap"],
    ).sort_values(["index", "parent_index"])
    if return_all:
        return result
    else:
        return result.isolation.values
示例#29
0
class RectIndex(object):
    """A R-tree that stores all tracks on a layer."""
    def __init__(self, resolution, basename=None, overwrite=False):
        # type: (float) -> None
        self._res = resolution
        self._cnt = 0
        if basename is None:
            self._index = Index(interleaved=True)
        else:
            p = Property(overwrite=overwrite)
            self._index = Index(basename, interleaved=True, properties=p)

    @property
    def bound_box(self):
        # type: () -> BBox
        xl, yb, xr, yt = self._index.bounds
        return BBox(int(xl),
                    int(yb),
                    int(xr),
                    int(yt),
                    self._res,
                    unit_mode=True)

    def close(self):
        self._index.close()

    def record_box(self, box, dx, dy):
        # type: (BBox, int, int) -> None
        """Record the given BBox."""
        sp_box = box.expand(dx=dx, dy=dy, unit_mode=True)
        bnds = sp_box.get_bounds(unit_mode=True)
        obj = (box.left_unit, box.bottom_unit, box.right_unit, box.top_unit,
               dx, dy)
        self._index.insert(self._cnt, bnds, obj=obj)
        self._cnt += 1

    def rect_iter(self):
        # type: () -> Generator[Tuple[BBox, int, int], None, None]
        for xl, yb, xr, yt, sdx, sdy in self._index.intersection(
                self._index.bounds, objects='raw'):
            box_real = BBox(xl, yb, xr, yt, self._res, unit_mode=True)
            yield box_real, sdx, sdy

    def intersection_iter(self, box, dx=0, dy=0):
        # type: (BBox, int, int) -> Generator[BBox, None, None]
        """Finds all bounding box that intersects the given box."""
        res = self._res
        test_box = box.expand(dx=dx, dy=dy, unit_mode=True)
        box_iter = self._index.intersection(
            test_box.get_bounds(unit_mode=True), objects='raw')
        for xl, yb, xr, yt, sdx, sdy in box_iter:
            box_real = BBox(xl, yb, xr, yt, res, unit_mode=True)
            box_sp = box_real.expand(dx=sdx, dy=sdy, unit_mode=True)
            if box_sp.overlaps(box) or test_box.overlaps(box_real):
                yield box_real.expand(dx=max(dx, sdx),
                                      dy=max(dy, sdy),
                                      unit_mode=True)

    def intersection_rect_iter(self, box):
        # type: (BBox) -> Generator[BBox, None, None]
        """Finds all bounding box that intersects the given box."""
        res = self._res
        box_iter = self._index.intersection(box.get_bounds(unit_mode=True),
                                            objects='raw')
        for xl, yb, xr, yt, sdx, sdy in box_iter:
            yield BBox(xl, yb, xr, yt, res, unit_mode=True)
示例#30
0
    polygons = []
    shapefile_records = []
    for shape_file in shape_files:
        print 'Getting polygons from: {}'.format(shape_file)
        with fiona.open('shape files/' + shape_file) as collection:
            for shapefile_record in collection:
                shape = asShape(shapefile_record['geometry'])
                y1, x1, y2, x2 = shape.bounds
                shapefile_record['properties']['bounds'] = str((x1, y1, x2, y2))
                shapefile_records.append(shapefile_record)
                polygons.append(shape)

    index = Index()
    count = 0
    for polygon in polygons:
        index.insert(count, polygon.bounds)
        count += 1

    # recursively loop over every directory
    for root, directories, filenames in os.walk('root'):
        for filename in filenames:
            obj = None
            with open(os.path.join(root, filename), 'r') as f:
                bb = f.readline()
                tpl = eval(bb)
                r = Rect(*tpl)
                # point = Point(*r.centre_point)
                records = []
                # for j in index.nearest(r.rtree_bb(), 1):
                for j in index.intersection(r.rtree_bb()):
                    shapefile = shapefile_records[j]
示例#31
0
class AdjacencyVersion(object):
    def __init__(self, feature_mapper):
        #self.partitions_complete = partitions_complete
        self.cid = 0
        self.disc_idxs = {}
        self.feature_mapper = feature_mapper
        self.radius = .15
        self.metric = 'hamming'

        self._rtree = None  # internal datastructure
        self._ndim = None
        self.clusters = []
        self.id2c = dict()
        self.c2id = dict()

    def to_json(self):
        data = {
            'clusters': [c and c.__dict__ or None for c in self.clusters],
            'id2c': [(key, c.__dict__) for key, c in self.id2c.items()],
            'c2id': [(c.__dict__, val) for c, val in self.c2id.items()],
            'cid': self.cid,
            '_ndim': self._ndim,
            '_rtreename': 'BLAH'
        }
        return json.dumps(data)

    def from_json(self, encoded):
        data = json.loads(encoded)
        self.clusters = [
            c and Cluster.from_dict(c) or None for c in data['clusters']
        ]
        self.id2c = dict([(key, Cluster.from_dict(val))
                          for key, val in data['id2c']])
        self.c2id = dict([(Cluster.from_dict(key), val)
                          for key, val in data['c2id']])
        self.cid = data['cid']
        self._ndim = data['_ndim']
        self._rtree = None

    def setup_rtree(self, ndim, clusters=None):
        if self._rtree:
            return self._rtree

        self._ndim = ndim
        if not ndim:

            class k(object):
                def __init__(self, graph):
                    self.graph = graph

                def insert(self, *args, **kwargs):
                    pass

                def delete(self, *args, **kwargs):
                    pass

                def intersection(self, *args, **kwargs):
                    return xrange(len(self.graph.clusters))

            self._rtree = k(self)
            return self._rtree

        p = RProp()
        p.dimension = max(2, ndim)
        p.dat_extension = 'data'
        p.idx_extension = 'index'

        if clusters:
            gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None)
                        for i, c in enumerate(clusters))
            self._rtree = RTree(gen_func, properties=p)
        else:
            self._rtree = RTree(properties=p)
        return self._rtree

    def bbox_rtree(self, cluster, enlarge=0.):
        cols = cluster.cols
        bbox = cluster.bbox
        lower, higher = map(list, bbox)
        if self._ndim == 1:
            lower.append(0)
            higher.append(1)

        if enlarge != 0:
            for idx, col in enumerate(cols):
                rng = enlarge * self.feature_mapper.ranges[col]
                lower[idx] -= rng
                higher[idx] += rng

        bbox = lower + higher
        return bbox

    def insert_rtree(self, idx, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        self._rtree.insert(idx, self.bbox_rtree(cluster))
        return cluster

    def remove_rtree(self, idx, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        self._rtree.delete(idx, self.bbox_rtree(cluster))
        return cluster

    def search_rtree(self, cluster):
        self.setup_rtree(len(cluster.bbox[0]))
        bbox = self.bbox_rtree(cluster, enlarge=0.01)
        return self._rtree.intersection(bbox)
        res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)]
        return filter(bool, res)

    def bulk_init(self, clusters):
        if not clusters: return

        self.setup_rtree(len(clusters[0].bbox[0]), clusters)
        self.clusters = clusters
        for cid, c in enumerate(clusters):
            self.id2c[cid] = c
            self.c2id[c] = cid

        for dim in self.feature_mapper.attrs:
            Xs = []
            for cidx, c in enumerate(clusters):
                Xs.append(self.feature_mapper(c, dim))
            idx = NearestNeighbors(radius=self.radius,
                                   algorithm='ball_tree',
                                   metric=self.metric)
            self.disc_idxs[dim] = idx
            self.disc_idxs[dim].fit(np.array(Xs))

    def contains(self, cluster):
        return cluster in self.c2id

    def remove(self, cluster):
        if cluster in self.c2id:
            cid = self.c2id[cluster]
            self.remove_rtree(cid, cluster)
            del self.c2id[cluster]
            del self.id2c[cid]
            self.clusters[cid] = None
            return True
        return False

    def neighbors(self, cluster):
        ret = None
        for name, vals in cluster.discretes.iteritems():
            if name not in self.disc_idxs:
                return []
            vect = self.feature_mapper(cluster, name)
            index = self.disc_idxs[name]
            dists, idxs = index.radius_neighbors(vect, radius=self.radius)
            idxs = set(idxs[0].tolist())

            if ret is None:
                ret = idxs
            else:
                ret.intersection_update(idxs)
                #ret.update(idxs)
            if not ret: return []

        idxs = self.search_rtree(cluster)
        if ret is None:
            ret = set(idxs)
        else:
            ret.intersection_update(set(idxs))

        return filter(bool, [self.clusters[idx] for idx in ret])

    """
示例#32
0
def main(input_dir, output_dir):
    formatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]: %(message)s')
    handler = logging.StreamHandler(sys.stderr)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    city_names = []
    rtree = RTreeIndex()

    cities_filename = os.path.join(tempfile.gettempdir(), 'cities.json')

    subprocess.check_call(['wget', 'https://raw.githubusercontent.com/mapzen/metroextractor-cities/master/cities.json', '-O', cities_filename])

    all_cities = json.load(open(cities_filename))

    i = 0

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            bbox = data['bbox']
            rtree.insert(i, (float(bbox['left']), float(bbox['bottom']), float(bbox['right']), float(bbox['top'])))        
            city_names.append(city)
            i += 1

    files = {name: open(os.path.join(output_dir, 'cities', '{}.geojson'.format(name)), 'w') for name in city_names}

    planet = open(os.path.join(output_dir, 'planet.geojson'), 'w')
    planet_addresses_only = open(os.path.join(output_dir, 'planet_addresses_only.json'), 'w')

    i = 0
    seen = set()

    for url, canonical, venues in gen_venues(input_dir):
        domain = urlparse.urlsplit(url).netloc.strip('www.')
        for props in venues:
            lat = props.get('latitude')
            lon = props.get('longitude')
            props['canonical'] = canonical
            props['url'] = url
            street = props.get('street_address')
            name = props.get('name')
            planet_hash = hashlib.md5(u'|'.join((name, street, str(lat), str(lon), domain)).encode('utf-8')).digest()
            address_hash = hashlib.md5(u'|'.join((name, street, domain)).encode('utf-8')).digest()
            props['guid'] = props.get('guid', random_guid())
            venue = venue_to_geojson(props)
            if lat is not None and lon is not None:
                try:
                    lat = float(lat)
                    lon = float(lon)
                except Exception:
                    lat = None
                    lon = None
            if lat is not None and lon is not None and planet_hash not in seen:
                cities = list(rtree.intersection((lon, lat, lon, lat)))
                if cities:
                    for c in cities:
                        f = files[city_names[c]]
                    f.write(json.dumps(venue) + '\n')
                if planet_hash not in seen:
                    planet.write(json.dumps(venue) + '\n')
                    seen.add(planet_hash)
            if address_hash not in seen:
                planet_addresses_only.write(json.dumps(props) + '\n')
                seen.add(address_hash)
            i += 1
            if i % 1000 == 0 and i > 0:
                logger.info('did {}'.format(i))

    logger.info('Creating manifest files')

    manifest_files = []

    for k, v in all_cities['regions'].iteritems():
        for city, data in v['cities'].iteritems():
            f = files[city]
            if f.tell() == 0:
                f.close()
                os.unlink(os.path.join(output_dir, 'cities', '{}.geojson'.format(city)))
                continue

            bbox = data['bbox']
            lat = midpoint(float(bbox['top']), float(bbox['bottom']))
            lon = midpoint(float(bbox['left']), float(bbox['right']))

            manifest_files.append({'latitude': lat, 'longitude': lon, 'file': '{}.geojson'.format(city), 'name': city.replace('_', ', ').replace('-', ' ').title()})

    manifest = {'files': manifest_files}

    json.dump(manifest, open(os.path.join(output_dir, 'manifest.json'), 'w'))

    logger.info('Done!')
示例#33
0
class SpatialIndex():
    """

    A spatial index is a type of extended index that allows you to index a
    spatial column. A spatial column is a table column that contains data of a
    spatial data type.

    Spatial indexes help to improve spatial query performance on a dataframe.
    Identifying a feature, selecting features, and joining data all have better
    performace when using spatial indexing.


    ====================     ==================================================
    Arguement                Description
    --------------------     --------------------------------------------------
    stype                    Required String. This sets the type of spatial
                             index being used by the user. The current types of
                             spatial indexes are: custom, rtree and quadtree.
    --------------------     --------------------------------------------------
    bbox                     Optional Tuple. The extent of the spatial data as:
                             (xmin, ymin, xmax, ymax). This parameter is required
                             if a QuadTree Spatial Index is being used.

                             Example:
                             bbox=(-100, -50, 100, 50)
    --------------------     --------------------------------------------------
    filename                 Optional String. The name of the spatial index
                             file. This is only supported by rtree spatial
                             indexes. For large datasets an rtree index can be
                             saved to disk and used at a later time. If this is
                             not provided the r-tree index will be in-memory.
    --------------------     --------------------------------------------------
    custom_index             Optional Object. Sometimes QuadTree and Rtree
                             indexing is not enough. A custom spatial index class
                             can be giving to the SpatialIndex class and used
                             using encapsulation.  The custom index must have two
                             methods: `intersect` that excepts a tuple, and
                             `insert` which must accept an oid and a bounding
                             box. This object is required when `stype` of
                             'custom' is specified.
    ====================     ==================================================


    """
    _stype = None
    _bbox = None
    _index = None
    _df = None

    #----------------------------------------------------------------------
    def __init__(self, stype, bbox=None, **kwargs):
        """initializer"""
        ci = kwargs.pop('custom_index', None)
        self._filename = kwargs.pop('filename', None)
        self._bbox = bbox
        self._stype = stype.lower()
        self._df = None
        if ci and stype.lower() == 'custom':
            self._index = ci
        elif stype.lower() == 'quadtree' and bbox:
            self._index = QIndex(bbox=bbox)
        elif RIndex and stype.lower() == 'rtree':
            self._index = RIndex(self._filename)
        else:
            raise ValueError("Could not create the spatial index.")

    #----------------------------------------------------------------------
    def intersect(self, bbox):
        """
        Returns the spatial features that intersect the bbox

        :bbox: tuple - (xmin,ymin,xmax,ymax)

        :returns: list
        """
        if self._stype.lower() in ['rtree']:
            return list(self._index.intersection(bbox))
        elif self._stype.lower() in ['quadtree']:
            return list(self._index.intersect(bbox=bbox))
        else:
            return list(self._index.intersect(bbox))

    #----------------------------------------------------------------------
    def insert(self, oid, bbox):
        """
        Inserts the entry into the spatial index

        :oid: unique id
        :bbox: tuple - (xmin,ymin,xmax,ymax)
        """
        if self._index is None:
            raise Exception(("Could not insert into a spatial index because "
                             "it does not exist."))
        if self._stype == 'rtree' and \
           HASRTREE and \
           isinstance(self._index, RIndex):

            r = self._index.insert(id=oid, coordinates=bbox, obj=None)
            self.flush()
            return r
        elif self._stype.lower() == 'quadtree':
            return self._index.insert(item=oid, bbox=bbox)
        elif self._stype.lower() == 'custom':
            r = self._index.intersect(oid, bbox)
            self.flush()
            return r

    #----------------------------------------------------------------------
    def flush(self):
        """
        Saves the index to disk if a filename is given for an R-Tree Spatial Index.

        **This applies only to the R-Tree implementation of the spatial index.**

        :returns: Boolean

        """
        if hasattr(self._index, 'flush'):
            getattr(self._index, 'flush')()
        elif self._stype == 'rtree' and \
             self._filename:
            self._index.close()
            self._index = RIndex(self._filename)
        else:
            return False
        return True
示例#34
0
class RTreeTest(unittest.TestCase):
    def Xtest_insertion(self):
        repeat = 10
        basen = 100
        boxes = [Box(i) for i in range(basen)]
        t = timeit.Timer(lambda: self.insert_boxes(boxes),
                         setup=lambda: self.create_index_data([]))
        print(t.timeit(number=repeat) / repeat)

        n = 100000
        prior_boxes = [Box(i) for i in range(n)]
        boxes = [Box(i) for i in range(n, n + basen)]
        t = timeit.Timer(
            lambda: self.insert_boxes(boxes),
            setup=lambda: self.create_index_data(prior_boxes),
        )
        print(t.timeit(number=repeat) / repeat)

    def Xtest_creation(self):
        repeat = 10
        basen = 100
        boxes = [Box(i) for i in range(basen)]
        t = timeit.Timer(lambda: self.create_index_data(boxes))
        t0 = t.timeit(number=repeat) / repeat
        print(basen, t0)
        for i in range(6):
            m = 2**(i + 1)
            n = m * basen
            boxes = [Box(i) for i in range(n)]
            t = timeit.Timer(lambda: self.create_index_data(boxes))
            t1 = t.timeit(number=repeat) / repeat
            print(n, m, t1, t1 / t0)

    def Xtest_stream(self):
        repeat = 10
        n = 10000

        boxes = []
        for i in range(n):
            boxes.append(Box(i))

        def box_generator():
            for b in boxes:
                yield (b.index, b.box, b.index)

        t = timeit.Timer(lambda: self.create_index_data(boxes))
        print(t.timeit(number=repeat) / repeat)

        t = timeit.Timer(lambda: self.create_index_stream(box_generator()))
        print(t.timeit(number=repeat) / repeat)

    def Xtest_query(self):
        repeat = 10
        boxes = [Box(i) for i in range(100)]
        self.create_index_data(boxes)
        test_boxes = random.sample(boxes, 10)

        t = timeit.Timer(lambda: self.query_index(test_boxes))
        print(t.timeit(number=repeat) / repeat)

        boxes = [Box(i) for i in range(100000)]
        self.create_index_data(boxes)
        test_boxes = random.sample(boxes, 10)

        t = timeit.Timer(lambda: self.query_index(test_boxes))
        print(t.timeit(number=repeat) / repeat)

    def insert_boxes(self, boxes):
        for b in boxes:
            self.idx.insert(b.index, b.box)

    def create_index_data(self, data):
        self.idx = Index()
        for d in data:
            self.idx.insert(d.index, d.box, d.index)

    def create_index_stream(self, generator):
        self.idx = Index(generator)

    def query_index(self, boxes):
        for b in boxes:
            overlapping_boxes = self.idx.intersection(b.box)