def loads(string): """ Construct a GeoJson `dict` from WKB (`string`). """ string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type = _BINARY_TO_GEOM_TYPE.get(type_bytes) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) return importer(big_endian, type_bytes, data_bytes)
def _load_linestring(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: vert_wkb = as_bin_str(take(8 * num_dims, data_bytes)) fmt = '%s' + 'd' * num_dims vert = list(struct.unpack(fmt % endian_token, vert_wkb)) if is_m: vert.insert(2, 0.0) coords.append(vert) if len(coords) == num_verts: break return dict(type='LineString', coordinates=list(coords))
def _load_multilinestring(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] [num_ls] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) coords = [] while True: ls_endian = as_bin_str(take(1, data_bytes)) ls_type = as_bin_str(take(4, data_bytes)) if big_endian: assert ls_endian == BIG_ENDIAN assert ls_type == _WKB[dim]['LineString'] else: assert ls_endian == LITTLE_ENDIAN assert ls_type[::-1] == _WKB[dim]['LineString'] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) num_values = num_dims * num_verts values = struct.unpack(endian_token + 'd' * num_values, as_bin_str(take(8 * num_values, data_bytes))) values = list(block_splitter(values, num_dims)) if is_m: for v in values: v.insert(2, 0.0) coords.append(values) if len(coords) == num_ls: break return dict(type='MultiLineString', coordinates=coords)
def _load_geometrycollection(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 geometries = [] [num_geoms] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: geometry = loads(data_bytes) if is_m: _check_dimensionality(geometry, 4) else: _check_dimensionality(geometry, num_dims) # TODO(LB): Add type assertions for the geometry; collections should # not mix 2d, 3d, 4d, etc. geometries.append(geometry) if len(geometries) == num_geoms: break return dict(type='GeometryCollection', geometries=geometries)
def _load_multipoint(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] coords = [] [num_points] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: point_endian = as_bin_str(take(1, data_bytes)) point_type = as_bin_str(take(4, data_bytes)) values = struct.unpack('%s%s' % (endian_token, 'd' * num_dims), as_bin_str(take(8 * num_dims, data_bytes))) values = list(values) if is_m: values.insert(2, 0.0) if big_endian: assert point_endian == BIG_ENDIAN assert point_type == _WKB[dim]['Point'] else: assert point_endian == LITTLE_ENDIAN assert point_type[::-1] == _WKB[dim]['Point'] coords.append(list(values)) if len(coords) == num_points: break return dict(type='MultiPoint', coordinates=coords)
def _load_polygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) verts_wkb = as_bin_str(take(8 * num_verts * num_dims, data_bytes)) verts = block_splitter(verts_wkb, 8) if six.PY2: verts = (b''.join(x) for x in verts) elif six.PY3: verts = (b''.join(bytes([y]) for y in x) for x in verts) for vert_wkb in block_splitter(verts, num_dims): values = [ struct.unpack('%sd' % endian_token, x)[0] for x in vert_wkb ] if is_m: values.insert(2, 0.0) ring.append(values) coords.append(ring) if len(coords) == num_rings: break return dict(type='Polygon', coordinates=coords)
def _load_polygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' data_bytes = iter(data_bytes) is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 coords = [] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) while True: ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) verts_wkb = as_bin_str(take(8 * num_verts * num_dims, data_bytes)) verts = block_splitter(verts_wkb, 8) if six.PY2: verts = (b''.join(x) for x in verts) elif six.PY3: verts = (b''.join(bytes([y]) for y in x) for x in verts) for vert_wkb in block_splitter(verts, num_dims): values = [struct.unpack('%sd' % endian_token, x)[0] for x in vert_wkb] if is_m: values.insert(2, 0.0) ring.append(values) coords.append(ring) if len(coords) == num_rings: break return dict(type='Polygon', coordinates=coords)
def _load_point(big_endian, type_bytes, data_bytes): """ Convert byte data for a Point to a GeoJSON `dict`. :param bool big_endian: If `True`, interpret the ``data_bytes`` in big endian order, else little endian. :param str type_bytes: 4-byte integer (as a binary string) indicating the geometry type (Point) and the dimensions (2D, Z, M or ZM). For consistency, these bytes are expected to always be in big endian order, regardless of the value of ``big_endian``. :param str data_bytes: Coordinate data in a binary string. :returns: GeoJSON `dict` representing the Point geometry. """ endian_token = '>' if big_endian else '<' if type_bytes == WKB_2D['Point']: coords = struct.unpack('%sdd' % endian_token, as_bin_str(take(16, data_bytes))) elif type_bytes == WKB_Z['Point']: coords = struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes))) elif type_bytes == WKB_M['Point']: # NOTE: The use of XYM types geometries is quite rare. In the interest # of removing ambiguity, we will treat all XYM geometries as XYZM when # generate the GeoJSON. A default Z value of `0.0` will be given in # this case. coords = list( struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes)))) coords.insert(2, 0.0) elif type_bytes == WKB_ZM['Point']: coords = struct.unpack('%sdddd' % endian_token, as_bin_str(take(32, data_bytes))) return dict(type='Point', coordinates=list(coords))
def _load_point(big_endian, type_bytes, data_bytes): """ Convert byte data for a Point to a GeoJSON `dict`. :param bool big_endian: If `True`, interpret the ``data_bytes`` in big endian order, else little endian. :param str type_bytes: 4-byte integer (as a binary string) indicating the geometry type (Point) and the dimensions (2D, Z, M or ZM). For consistency, these bytes are expected to always be in big endian order, regardless of the value of ``big_endian``. :param str data_bytes: Coordinate data in a binary string. :returns: GeoJSON `dict` representing the Point geometry. """ endian_token = '>' if big_endian else '<' if type_bytes == WKB_2D['Point']: coords = struct.unpack('%sdd' % endian_token, as_bin_str(take(16, data_bytes))) elif type_bytes == WKB_Z['Point']: coords = struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes))) elif type_bytes == WKB_M['Point']: # NOTE: The use of XYM types geometries is quite rare. In the interest # of removing ambiguity, we will treat all XYM geometries as XYZM when # generate the GeoJSON. A default Z value of `0.0` will be given in # this case. coords = list(struct.unpack('%sddd' % endian_token, as_bin_str(take(24, data_bytes)))) coords.insert(2, 0.0) elif type_bytes == WKB_ZM['Point']: coords = struct.unpack('%sdddd' % endian_token, as_bin_str(take(32, data_bytes))) return dict(type='Point', coordinates=list(coords))
def _load_multipolygon(big_endian, type_bytes, data_bytes): endian_token = '>' if big_endian else '<' is_m = False if type_bytes in WKB_2D.values(): num_dims = 2 elif type_bytes in WKB_Z.values(): num_dims = 3 elif type_bytes in WKB_M.values(): num_dims = 3 is_m = True elif type_bytes in WKB_ZM.values(): num_dims = 4 if is_m: dim = 'M' else: dim = _INT_TO_DIM_LABEL[num_dims] [num_polys] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) coords = [] while True: polygon = [] poly_endian = as_bin_str(take(1, data_bytes)) poly_type = as_bin_str(take(4, data_bytes)) if big_endian: assert poly_endian == BIG_ENDIAN assert poly_type == _WKB[dim]['Polygon'] else: assert poly_endian == LITTLE_ENDIAN assert poly_type[::-1] == _WKB[dim]['Polygon'] [num_rings] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) for _ in range(num_rings): ring = [] [num_verts] = struct.unpack('%sl' % endian_token, as_bin_str(take(4, data_bytes))) for _ in range(num_verts): vert_wkb = as_bin_str(take(8 * num_dims, data_bytes)) fmt = '%s' + 'd' * num_dims vert = list(struct.unpack(fmt % endian_token, vert_wkb)) if is_m: vert.insert(2, 0.0) ring.append(vert) polygon.append(ring) coords.append(polygon) if len(coords) == num_polys: break return dict(type='MultiPolygon', coordinates=coords)
def loads(string): """ Construct a GeoJSON `dict` from WKB (`string`). The resulting GeoJSON `dict` will include the SRID as an integer in the `meta` object. This was an arbitrary decision made by `geomet, the discussion of which took place here: https://github.com/geomet/geomet/issues/28. In order to be consistent with other libraries [1] and (deprecated) specifications [2], also include the same information in a `crs` object. This isn't ideal, but the `crs` member is no longer part of the GeoJSON standard, according to RFC7946 [3]. However, it's still useful to include this information in GeoJSON payloads because it supports conversion to EWKT/EWKB (which are canonical formats used by PostGIS and the like). Example: {'type': 'Point', 'coordinates': [0.0, 1.0], 'meta': {'srid': 4326}, 'crs': {'type': 'name', 'properties': {'name': 'EPSG4326'}}} NOTE(larsbutler): I'm not sure if it's valid to just prefix EPSG (European Petroluem Survey Group) to an SRID like this, but we'll stick with it for now until it becomes a problem. NOTE(larsbutler): Ideally, we should use URNs instead of this notation, according to the new GeoJSON spec [4]. However, in order to be consistent with [1], we'll stick with this approach for now. References: [1] - https://github.com/bryanjos/geo/issues/76 [2] - http://geojson.org/geojson-spec.html#coordinate-reference-system-objects [3] - https://tools.ietf.org/html/rfc7946#appendix-B.1 [4] - https://tools.ietf.org/html/rfc7946#section-4 """ # noqa string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) endian_token = '>' if big_endian else '<' # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type, type_bytes, has_srid = _get_geom_type(type_bytes) srid = None if has_srid: srid_field = as_bin_str(take(4, string)) [srid] = struct.unpack('%si' % endian_token, srid_field) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) result = importer(big_endian, type_bytes, data_bytes) if has_srid: # As mentioned in the docstring above, include both approaches to # indicating the SRID. result['meta'] = {'srid': int(srid)} result['crs'] = { 'type': 'name', 'properties': {'name': 'EPSG%s' % srid}, } return result
def loads(string): """ Construct a GeoJSON `dict` from WKB (`string`). The resulting GeoJSON `dict` will include the SRID as an integer in the `meta` object. This was an arbitrary decision made by `geomet, the discussion of which took place here: https://github.com/geomet/geomet/issues/28. In order to be consistent with other libraries [1] and (deprecated) specifications [2], also include the same information in a `crs` object. This isn't ideal, but the `crs` member is no longer part of the GeoJSON standard, according to RFC7946 [3]. However, it's still useful to include this information in GeoJSON payloads because it supports conversion to EWKT/EWKB (which are canonical formats used by PostGIS and the like). Example: {'type': 'Point', 'coordinates': [0.0, 1.0], 'meta': {'srid': 4326}, 'crs': {'type': 'name', 'properties': {'name': 'EPSG4326'}}} NOTE(larsbutler): I'm not sure if it's valid to just prefix EPSG (European Petroluem Survey Group) to an SRID like this, but we'll stick with it for now until it becomes a problem. NOTE(larsbutler): Ideally, we should use URNs instead of this notation, according to the new GeoJSON spec [4]. However, in order to be consistent with [1], we'll stick with this approach for now. References: [1] - https://github.com/bryanjos/geo/issues/76 [2] - http://geojson.org/geojson-spec.html#coordinate-reference-system-objects [3] - https://tools.ietf.org/html/rfc7946#appendix-B.1 [4] - https://tools.ietf.org/html/rfc7946#section-4 """ # noqa string = iter(string) # endianness = string[0:1] endianness = as_bin_str(take(1, string)) if endianness == BIG_ENDIAN: big_endian = True elif endianness == LITTLE_ENDIAN: big_endian = False else: raise ValueError("Invalid endian byte: '0x%s'. Expected 0x00 or 0x01" % binascii.hexlify(endianness.encode()).decode()) endian_token = '>' if big_endian else '<' # type_bytes = string[1:5] type_bytes = as_bin_str(take(4, string)) if not big_endian: # To identify the type, order the type bytes in big endian: type_bytes = type_bytes[::-1] geom_type, type_bytes, has_srid = _get_geom_type(type_bytes) srid = None if has_srid: srid_field = as_bin_str(take(4, string)) [srid] = struct.unpack('%si' % endian_token, srid_field) # data_bytes = string[5:] # FIXME: This won't work for GeometryCollections data_bytes = string importer = _loads_registry.get(geom_type) if importer is None: _unsupported_geom_type(geom_type) data_bytes = iter(data_bytes) result = importer(big_endian, type_bytes, data_bytes) if has_srid: # As mentioned in the docstring above, include both approaches to # indicating the SRID. result['meta'] = {'srid': int(srid)} result['crs'] = { 'type': 'name', 'properties': { 'name': 'EPSG%s' % srid }, } return result