示例#1
0
class PolygonIndex(object):
    include_only_properties = None
    simplify_tolerance = 0.0001
    preserve_topology = True
    persistent_polygons = False
    cache_size = 0
    fix_invalid_polygons = False

    INDEX_FILENAME = None
    POLYGONS_DB_DIR = 'polygons'

    def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None,
                 index_filename=None,
                 polygons_db_path=None,
                 include_only_properties=None):
        if save_dir:
            self.save_dir = save_dir
        else:
            self.save_dir = None

        if not index_filename:
            index_filename = self.INDEX_FILENAME

        self.index_path = os.path.join(save_dir or '.', index_filename)

        if not index:
            self.create_index(overwrite=True)
        else:
            self.index = index

        if include_only_properties and hasattr(include_only_properties, '__contains__'):
            self.include_only_properties = include_only_properties

        if not polygons and not self.persistent_polygons:
            self.polygons = {}
        elif polygons and not self.persistent_polygons:
            self.polygons = polygons
        elif self.persistent_polygons and self.cache_size > 0:
            self.polygons = LRU(self.cache_size)
            if polygons:
                for key, value in six.iteritems(polygons):
                    self.polygons[key] = value

            self.cache_hits = 0
            self.cache_misses = 0

            self.get_polygon = self.get_polygon_cached

        if not polygons_db_path:
            polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)

        if not polygons_db:
            self.polygons_db = LevelDB(polygons_db_path)
        else:
            self.polygons_db = polygons_db

        self.setup()

        self.i = 0

    def create_index(self, overwrite=False):
        raise NotImplementedError('Children must implement')

    def index_polygon(self, polygon):
        raise NotImplementedError('Children must implement')

    def setup(self):
        pass

    def clear_cache(self, garbage_collect=True):
        if self.persistent_polygons and self.cache_size > 0:
            self.polygons.clear()
            if garbage_collect:
                gc.collect()

    def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None):
        if simplify_tolerance is None:
            simplify_tolerance = self.simplify_tolerance
        if preserve_topology is None:
            preserve_topology = self.preserve_topology
        return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)

    def index_polygon_properties(self, properties):
        pass

    def polygon_geojson(self, poly, properties):
        return {
            'type': 'Feature',
            'geometry': mapping(poly),
        }

    def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
        if include_only_properties is not None:
            properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}

        if not self.persistent_polygons or cache:
            self.polygons[self.i] = prep(poly)

        if self.persistent_polygons:
            self.polygons_db.Put(self.polygon_key(self.i), json.dumps(self.polygon_geojson(poly, properties)))

        self.polygons_db.Put(self.properties_key(self.i), json.dumps(properties))
        self.index_polygon_properties(properties)
        self.i += 1

    @classmethod
    def create_from_shapefiles(cls, inputs, output_dir,
                               index_filename=None,
                               include_only_properties=None):
        index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
        for input_file in inputs:
            if include_only_properties is not None:
                include_props = include_only_properties.get(input_file, cls.include_only_properties)
            else:
                include_props = cls.include_only_properties

            f = fiona.open(input_file)

            index.add_geojson_like_file(f)

        return index

    @classmethod
    def fix_polygon(cls, poly):
        '''
        Coerce to valid polygon
        '''
        if not poly.is_valid:
            poly = poly.buffer(0)
            if not poly.is_valid:
                return None
        return poly

    @classmethod
    def to_polygon(cls, coords, holes=None, test_point=None):
        '''
        Create shapely polygon from list of coordinate tuples if valid
        '''
        if not coords or len(coords) < 3:
            return None

        # Fix for polygons crossing the 180th meridian
        lons = [lon for lon, lat in coords]
        if (max(lons) - min(lons) > 180):
            coords = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in coords]
            if holes:
                holes = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in holes]

        poly = Polygon(coords, holes)
        try:
            if test_point is None:
                test_point = poly.representative_point()
            invalid = cls.fix_invalid_polygons and not poly.is_valid and not poly.contains(test_point)
        except Exception:
            invalid = True

        if invalid:
            try:
                poly_fix = cls.fix_polygon(poly)

                if poly_fix is not None and poly_fix.bounds and len(poly_fix.bounds) == 4 and poly_fix.is_valid and poly_fix.type == poly.type:
                    if test_point is None:
                        test_point = poly_fix.representative_point()

                    if poly_fix.contains(test_point):
                        poly = poly_fix
            except Exception:
                pass

        return poly

    def add_geojson_like_record(self, rec, include_only_properties=None):
        if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
            return
        poly_type = rec['geometry']['type']
        if poly_type == 'Polygon':
            coords = rec['geometry']['coordinates'][0]
            poly = self.to_polygon(coords)
            if poly is None or not poly.bounds or len(poly.bounds) != 4:
                return
            self.index_polygon(poly)
            self.add_polygon(poly, rec['properties'], include_only_properties=include_only_properties)
        elif poly_type == 'MultiPolygon':
            polys = []
            poly_coords = rec['geometry']['coordinates']
            for coords in poly_coords:
                poly = self.to_polygon(coords[0])
                if poly is None or not poly.bounds or len(poly.bounds) != 4:
                    continue
                polys.append(poly)
                self.index_polygon(poly)

            self.add_polygon(MultiPolygon(polys), rec['properties'], include_only_properties=include_only_properties)
        else:
            return

    def add_geojson_like_file(self, f, include_only_properties=None):
        '''
        Add either GeoJSON or a shapefile record to the index
        '''

        for rec in f:
            self.add_geojson_like_record(rec, include_only_properties=include_only_properties)

    @classmethod
    def create_from_geojson_files(cls, inputs, output_dir,
                                  index_filename=None,
                                  polys_filename=DEFAULT_POLYS_FILENAME,
                                  include_only_properties=None):
        index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
        for input_file in inputs:
            if include_only_properties is not None:
                include_props = include_only_properties.get(input_file, cls.include_only_properties)
            else:
                include_props = cls.include_only_properties

            f = json.load(open(input_file))

            index.add_geojson_like_file(f['features'], include_only_properties=include_props)

        return index

    def compact_polygons_db(self):
        self.polygons_db.CompactRange('\x00', '\xff')

    def save(self):
        self.save_index()
        self.save_properties(os.path.join(self.save_dir, DEFAULT_PROPS_FILENAME))
        if not self.persistent_polygons:
            self.save_polygons(os.path.join(self.save_dir, DEFAULT_POLYS_FILENAME))
        self.compact_polygons_db()
        self.save_polygon_properties(self.save_dir)

    def load_properties(self, filename):
        properties = json.load(open(filename))
        self.i = int(properties.get('num_polygons', self.i))

    def save_properties(self, out_filename):
        out = open(out_filename, 'w')
        json.dump({'num_polygons': str(self.i)}, out)

    def save_polygons(self, out_filename):
        out = open(out_filename, 'w')
        for i in xrange(self.i):
            poly = self.polygons[i]
            feature = {
                'type': 'Feature',
                'geometry': mapping(poly.context),
            }
            out.write(json.dumps(feature) + u'\n')

    def save_index(self):
        raise NotImplementedError('Children must implement')

    def load_polygon_properties(self, d):
        pass

    def save_polygon_properties(self, d):
        pass

    @classmethod
    def polygon_from_geojson(cls, feature):
        poly_type = feature['geometry']['type']
        if poly_type == 'Polygon':
            coords = feature['geometry']['coordinates']
            poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
            return poly
        elif poly_type == 'MultiPolygon':
            polys = []
            for coords in feature['geometry']['coordinates']:
                poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
                polys.append(poly)

            return MultiPolygon(polys)

    @classmethod
    def load_polygons(cls, filename):
        f = open(filename)
        polygons = {}
        cls.i = 0
        for line in f:
            feature = json.loads(line.rstrip())
            polygons[cls.i] = prep(cls.polygon_from_geojson(feature))
            cls.i += 1
        return polygons

    @classmethod
    def load_index(cls, d, index_name=None):
        raise NotImplementedError('Children must implement')

    @classmethod
    def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME,
             properties_filename=DEFAULT_PROPS_FILENAME,
             polys_db_dir=POLYGONS_DB_DIR):
        index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
        if not cls.persistent_polygons:
            polys = cls.load_polygons(os.path.join(d, polys_filename))
        else:
            polys = None
        polygons_db = LevelDB(os.path.join(d, polys_db_dir))
        polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
        polygon_index.load_properties(os.path.join(d, properties_filename))
        polygon_index.load_polygon_properties(d)
        return polygon_index

    def get_candidate_polygons(self, lat, lon):
        raise NotImplementedError('Children must implement')

    def get_properties(self, i):
        return json.loads(self.polygons_db.Get(self.properties_key(i)))

    def get_polygon(self, i):
        return self.polygons[i]

    def get_polygon_cached(self, i):
        poly = self.polygons.get(i, None)
        if poly is None:
            data = json.loads(self.polygons_db.Get(self.polygon_key(i)))
            poly = prep(self.polygon_from_geojson(data))
            self.polygons[i] = poly
            self.cache_misses += 1
        else:
            self.cache_hits += 1
        return poly

    def __iter__(self):
        for i in xrange(self.i):
            yield self.get_properties(i), self.get_polygon(i)

    def __len__(self):
        return self.i

    def polygons_contain(self, candidates, point, return_all=False):
        containing = None
        if return_all:
            containing = []
        for i in candidates:
            poly = self.get_polygon(i)
            contains = poly.contains(point)
            if contains:
                properties = self.get_properties(i)
                if not return_all:
                    return properties
                else:
                    containing.append(properties)
        return containing

    def polygon_key(self, i):
        return 'poly:{}'.format(i)

    def properties_key(self, i):
        return 'props:{}'.format(i)

    def point_in_poly(self, lat, lon, return_all=False):
        candidates = self.get_candidate_polygons(lat, lon)
        point = Point(lon, lat)
        return self.polygons_contain(candidates, point, return_all=return_all)
示例#2
0
class PointIndex(object):
    include_only_properties = None
    persistent_index = False
    cache_size = 0

    POINTS_DB_DIR = 'points'

    GEOHASH_PRECISION = 7
    PROPS_FILENAME = 'properties.json'
    POINTS_FILENAME = 'points.json'
    INDEX_FILENAME = 'index.json'

    def __init__(self,
                 index=None,
                 save_dir=None,
                 points=None,
                 points_path=None,
                 points_db=None,
                 points_db_path=None,
                 index_path=None,
                 include_only_properties=None,
                 precision=GEOHASH_PRECISION):
        if save_dir:
            self.save_dir = save_dir
        else:
            self.save_dir = None

        if include_only_properties and hasattr(include_only_properties,
                                               '__contains__'):
            self.include_only_properties = include_only_properties

        if not index_path:
            index_path = os.path.join(save_dir or '.', self.INDEX_FILENAME)

        self.index_path = index_path

        if not index:
            self.index = defaultdict(list)
        else:
            self.index = index

        if not points_path:
            points_path = os.path.join(save_dir or '.', self.POINTS_FILENAME)
        self.points_path = points_path

        if not points:
            self.points = array.array('d')
        else:
            self.points = points

        if not points_db_path:
            points_db_path = os.path.join(save_dir or '.', self.POINTS_DB_DIR)

        if not points_db:
            self.points_db = LevelDB(points_db_path)
        else:
            self.points_db = points_db

        self.precision = precision

        self.i = 0

    def index_point(self, lat, lon):
        code = geohash.encode(lat, lon)[:self.precision]

        for key in [code] + geohash.neighbors(code):
            self.index[key].append(self.i)
        self.points.extend([lat, lon])

    def add_point(self,
                  lat,
                  lon,
                  properties,
                  cache=False,
                  include_only_properties=None):
        if include_only_properties is None and self.include_only_properties:
            include_only_properties = self.include_only_properties
        if include_only_properties is not None:
            properties = {
                k: v
                for k, v in properties.iteritems()
                if k in include_only_properties
            }

        self.index_point(lat, lon)
        self.points_db.Put(self.properties_key(self.i), json.dumps(properties))
        self.i += 1

    def load_properties(self, filename):
        properties = json.load(open(filename))
        self.i = int(properties.get('num_points', self.i))
        self.precision = int(properties.get('precision', self.precision))

    def save_properties(self, out_filename):
        out = open(out_filename, 'w')
        json.dump({
            'num_points': str(self.i),
            'precision': self.precision
        }, out)

    def save_index(self):
        if not self.index_path:
            self.index_path = os.path.join(self.save_dir or '.',
                                           self.INDEX_FILENAME)
        json.dump(self.index, open(self.index_path, 'w'))

    @classmethod
    def load_index(cls, d, index_name=None):
        return json.load(
            open(os.path.join(d, index_name or cls.INDEX_FILENAME)))

    def save_points(self):
        json.dump(self.points, open(self.points_path, 'w'))

    @classmethod
    def load_points(cls, d):
        return array.array(
            'd', json.load(open(os.path.join(d, cls.POINTS_FILENAME))))

    def properties_key(self, i):
        return 'props:{}'.format(i)

    def get_properties(self, i):
        return json.loads(self.points_db.Get(self.properties_key(i)))

    def compact_points_db(self):
        self.points_db.CompactRange('\x00', '\xff')

    def save(self):
        self.save_index()
        self.save_points()
        self.compact_points_db()
        self.save_properties(os.path.join(self.save_dir, self.PROPS_FILENAME))

    @classmethod
    def load(cls, d):
        index = cls.load_index(d)
        points = cls.load_points(d)
        points_db = LevelDB(os.path.join(d, cls.POINTS_DB_DIR))
        point_index = cls(index=index, points=points, points_db=points_db)
        point_index.load_properties(os.path.join(d, cls.PROPS_FILENAME))
        return point_index

    def __iter__(self):
        for i in xrange(self.i):
            lat, lon = self.points[i * 2], self.points[i * 2 + 1]
            yield self.get_properties(i), lat, lon

    def __len__(self):
        return self.i

    def get_candidate_points(self, latitude, longitude):
        code = geohash.encode(latitude, longitude)[:self.precision]
        candidates = OrderedDict()

        candidates.update([(k, None) for k in self.index.get(code, [])])

        for neighbor in geohash.neighbors(code):
            candidates.update([(k, None)
                               for k in self.index.get(neighbor, [])])

        return candidates.keys()

    def point_distances(self, latitude, longitude):
        candidates = self.get_candidate_points(latitude, longitude)

        return [(i, self.points[i * 2], self.points[i * 2 + 1],
                 haversine_distance(latitude, longitude, self.points[i * 2],
                                    self.points[i * 2 + 1]))
                for i in candidates]

    def all_nearby_points(self, latitude, longitude):
        distances = self.point_distances(latitude, longitude)
        if not distances:
            return []
        return sorted(distances, key=operator.itemgetter(-1))

    def points_with_properties(self, results):
        return [(self.get_properties(i), lat, lon, distance)
                for i, lat, lon, distance in results]

    def nearest_points(self, latitude, longitude):
        return self.points_with_properties(
            self.all_nearby_points(latitude, longitude))

    def nearest_n_points(self, latitude, longitude, n=2):
        return self.points_with_properties(
            self.all_nearby_points(latitude, longitude)[:n])

    def nearest_point(self, latitude, longitude):
        distances = self.all_nearby_points(latitude, longitude)
        if not distances:
            return None
        return self.points_with_properties(distances[:1])[0]