def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.elasticsearch_.ElasticsearchProvider """ super().__init__(provider_def) self.es_host, self.index_name = self.data.rsplit('/', 1) LOGGER.debug('Setting Elasticsearch properties') self.is_gdal = False LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) self.type_name = 'FeatureCollection' self.url_parsed = urlparse(self.es_host) LOGGER.debug('Connecting to Elasticsearch') if self.url_parsed.port is None: # proxy to default HTTP(S) port if self.url_parsed.scheme == 'https': port = 443 else: port = 80 else: # was set explictly port = self.url_parsed.port url_settings = { 'scheme': self.url_parsed.scheme, 'host': self.url_parsed.hostname, 'port': port } if self.url_parsed.path: url_settings['url_prefix'] = self.url_parsed.path LOGGER.debug('URL settings: {}'.format(url_settings)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch([url_settings]) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Determining ES version') v = self.es.info()['version']['number'][:3] if float(v) < 7: msg = 'only ES 7+ supported' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def _open(self): source_type = self.data_def['source_type'] self.driver = self.ogr.GetDriverByName(source_type) if not self.driver: msg = 'No Driver for Source: {}'.format(source_type) LOGGER.error(msg) raise Exception(msg) if self.open_options: try: self.conn = self.gdal.OpenEx( self.data_def['source'], self.gdal.OF_VECTOR, open_options=self._list_open_options()) except RuntimeError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception: msg = 'Ignore errors during the connection for Driver \ {}'.format(source_type) LOGGER.error(msg) self.conn = _ignore_gdal_error( self.gdal, 'OpenEx', self.data_def['source'], self.gdal.OF_VECTOR, open_options=self._list_open_options()) else: try: self.conn = self.driver.Open(self.data_def['source'], 0) except RuntimeError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception: msg = 'Ignore errors during the connection for Driver \ {}'.format(source_type) LOGGER.error(msg) # ignore errors for ESRIJSON not having geometry member # see https://github.com/OSGeo/gdal/commit/38b0feed67f80ded32be6c508323d862e1a14474 # noqa self.conn = _ignore_gdal_error(self.driver, 'Open', self.data_def['source'], 0) if not self.conn: msg = 'Cannot open OGR Source: %s' % self.data_def['source'] LOGGER.error(msg) raise Exception(msg) # Always need to disable paging immediately after Open! if self.source_capabilities['paging']: self.source_helper.disable_paging()
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.xarray_.XarrayProvider """ super().__init__(provider_def) try: if provider_def['data'].endswith('.zarr'): open_func = xarray.open_zarr else: open_func = xarray.open_dataset self._data = open_func(self.data) self._data = _convert_float32_to_float64(self._data) self._coverage_properties = self._get_coverage_properties() self.axes = [ self._coverage_properties['x_axis_label'], self._coverage_properties['y_axis_label'], self._coverage_properties['time_axis_label'] ] self.fields = self._coverage_properties['fields'] except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def __enter__(self): try: search_path = self.conn_dic.pop('search_path', ['public']) if search_path != ['public']: self.conn_dic["options"] = '-c \ search_path={}'.format(",".join(search_path)) LOGGER.debug('Using search path: {} '.format(search_path)) self.conn = psycopg2.connect(**self.conn_dic) self.conn.set_client_encoding('utf8') except psycopg2.OperationalError: LOGGER.error("Couldn't connect to Postgis using:{}".format( str(self.conn_dic))) raise ProviderConnectionError() self.cur = self.conn.cursor() if self.context == 'query': # Getting columns query_cols = "SELECT column_name, udt_name FROM information_schema.columns \ WHERE table_name = '{}' and udt_name != 'geometry';".format( self.table) self.cur.execute(query_cols) result = self.cur.fetchall() self.columns = SQL(', ').join( [Identifier(item[0]) for item in result]) self.fields = dict(result) return self
def get_fields(self): """ Get provider field information (names, types) :returns: dict of fields """ fields = {} try: layer_defn = self._get_layer().GetLayerDefn() for fld in range(layer_defn.GetFieldCount()): field_defn = layer_defn.GetFieldDefn(fld) fieldName = field_defn.GetName() fieldTypeCode = field_defn.GetType() fieldType = field_defn.GetFieldTypeName(fieldTypeCode) fields[fieldName] = fieldType.lower() # fieldWidth = layer_defn.GetFieldDefn(fld).GetWidth() # GetPrecision = layer_defn.GetFieldDefn(fld).GetPrecision() except RuntimeError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception as err: LOGGER.error(err) finally: self._close() return fields
def get_metadata(self, dataset, server_url, layer=None, tileset=None, tilejson=True, **kwargs): """ Gets tile metadata :param dataset: dataset name :param server_url: server base url :param layer: mvt tile layer name :param tileset: mvt tileset name :param tilejson: `bool` for the returning json structure if True it returns MapBox TileJSON 3.0 otherwise the raw JSON is served :returns: `dict` of JSON metadata """ if is_url(self.data): url = urlparse(self.data) base_url = '{}://{}'.format(url.scheme, url.netloc) with requests.Session() as session: session.get(base_url) resp = session.get('{base_url}/{lyr}/metadata.json'.format( base_url=base_url, lyr=layer)) resp.raise_for_status() content = resp.json() else: if not isinstance(self.service_metadata_url, Path): msg = 'Wrong data path configuration: {}'.format( self.service_metadata_url) LOGGER.error(msg) raise ProviderConnectionError(msg) with open(self.service_metadata_url, 'r') as md_file: content = json.loads(md_file.read()) if tilejson: service_url = urljoin( server_url, 'collections/{}/tiles/{}/{{{}}}/{{{}}}/{{{}}}{}'.format( dataset, tileset, 'tileMatrix', 'tileRow', 'tileCol', '?f=mvt')) content = { "tilejson": "3.0.0", "name": content["name"], "tiles": service_url, "minzoom": content["minzoom"], "maxzoom": content["maxzoom"], "bounds": content["bounds"], "center": content["center"], "attribution": None, "description": None, "vector_layers": json.loads(content["json"])["vector_layers"] } else: content['json'] = json.loads(content['json']) return content
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.xarray_.XarrayProvider """ BaseProvider.__init__(self, provider_def) try: self._data = open_data(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = [ self._coverage_properties['x_axis_label'], self._coverage_properties['y_axis_label'], self._coverage_properties['time_axis_label'], 'percentile' ] if 'RCP' in self.data: self.axes.append('scenario') self.fields = self._coverage_properties['fields'] except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def __enter__(self): try: self.schema = self.conn_dic.pop('schema', None) if self.schema == 'public' or self.schema is None: pass else: self.conn_dic["options"] = '-c search_path={}'.format( self.schema) LOGGER.debug('Using schema {} as search path'.format( self.schema)) self.conn = psycopg2.connect(**self.conn_dic) except psycopg2.OperationalError: LOGGER.error('Couldnt connect to Postgis using:{}'.format( str(self.conn_dic))) raise ProviderConnectionError() self.cur = self.conn.cursor() if self.context == 'query': # Getting columns query_cols = "SELECT column_name FROM information_schema.columns \ WHERE table_name = '{}' and udt_name != 'geometry';".format( self.table) self.cur.execute(query_cols) result = self.cur.fetchall() self.columns = SQL(', ').join( [Identifier(item[0]) for item in result] ) return self
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider """ BaseProvider.__init__(self, provider_def) url_tokens = self.data.split('/') LOGGER.debug('Setting Elasticsearch properties') self.es_host = url_tokens[2] self.index_name = url_tokens[-2] self.type_name = url_tokens[-1] LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) LOGGER.debug('type: {}'.format(self.type_name)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch(self.es_host) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.tinydb_.TinyDBCatalogueProvider """ self.excludes = [ '_metadata-anytext', ] BaseProvider.__init__(self, provider_def) LOGGER.debug('Connecting to TinyDB db at {}'.format(self.data)) if not os.path.exists(self.data): msg = 'TinyDB does not exist' LOGGER.error(msg) raise ProviderConnectionError(msg) self.db = TinyDB(self.data) self.fields = self.get_fields()
def get(self, identifier): """ Get Feature by id :param identifier: feature id :returns: feature collection """ result = None try: LOGGER.debug('Fetching identifier {}'.format(identifier)) layer = self._get_layer() layer.SetAttributeFilter("{field} = '{id}'".format( field=self.id_field, id=identifier)) ogr_feature = self._get_next_feature(layer) result = self._ogr_feature_to_json(ogr_feature) except RuntimeError as err: LOGGER.error(err) raise ProviderQueryError(err) except ProviderConnectionError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception as err: LOGGER.error(err) raise ProviderGenericError(err) finally: self._close() return result
def __load(self): """ Private method for loading spatiallite, get the table structure and dump geometry :returns: sqlite3.Cursor """ if (os.path.exists(self.data)): conn = sqlite3.connect(self.data) else: raise InvalidPluginError try: conn.enable_load_extension(True) except AttributeError as err: LOGGER.error('Extension loading not enabled: {}'.format(err)) raise ProviderConnectionError() conn.row_factory = sqlite3.Row conn.enable_load_extension(True) cursor = conn.cursor() try: cursor.execute("SELECT load_extension('mod_spatialite.so')") cursor.execute("PRAGMA table_info({})".format(self.table)) except sqlite3.OperationalError as err: LOGGER.error('Extension loading error: {}'.format(err)) raise ProviderConnectionError() result = cursor.fetchall() try: # TODO: Better exceptions declaring # InvalidPluginError as Parent class assert len(result), "Table not found" assert len([item for item in result if item['pk'] == 1]), "Primary key not found" assert len([item for item in result if self.id_field in item]), "id_field not present" assert len([item for item in result if 'GEOMETRY' in item]), "GEOMETRY column not found" except InvalidPluginError: raise self.columns = [item[1] for item in result if item[1] != 'GEOMETRY'] self.columns = ",".join(self.columns) + ",AsGeoJSON(geometry)" return cursor
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.MVT.MVTProvider """ super().__init__(provider_def) if is_url(self.data): url = urlparse(self.data) baseurl = '{}://{}'.format(url.scheme, url.netloc) param_type = '?f=mvt' servicepath = \ '{}/tiles/{{{}}}/{{{}}}/{{{}}}/{{{}}}{}'.format( url.path.split('/{z}/{x}/{y}')[0], 'tileMatrixSetId', 'tileMatrix', 'tileRow', 'tileCol', param_type) self._service_url = url_join(baseurl, servicepath) self._service_metadata_url = urljoin( self.service_url.split('{tileMatrix}/{tileRow}/{tileCol}')[0], 'metadata') else: data_path = Path(self.data) if not data_path.exists(): msg = 'Service does not exist: {}'.format(self.data) LOGGER.error(msg) raise ProviderConnectionError(msg) self._service_url = data_path metadata_path = data_path.joinpath('metadata.json') if not metadata_path.exists(): msg = 'Service metadata does not exist: {}'.format( metadata_path.name) LOGGER.error(msg) raise ProviderConnectionError(msg) self._service_metadata_url = metadata_path
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.providers.elasticsearch_.ElasticsearchProvider """ BaseProvider.__init__(self, provider_def) url_tokens = self.data.split('/') LOGGER.debug('Setting Elasticsearch properties') self.es_host = url_tokens[2] self.index_name = url_tokens[-1] self.is_gdal = False LOGGER.debug('host: {}'.format(self.es_host)) LOGGER.debug('index: {}'.format(self.index_name)) LOGGER.debug('Connecting to Elasticsearch') self.es = Elasticsearch(self.es_host) if not self.es.ping(): msg = 'Cannot connect to Elasticsearch' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Determining ES version') v = self.es.info()['version']['number'][:3] if float(v) < 7: msg = 'only ES 7+ supported' LOGGER.error(msg) raise ProviderConnectionError(msg) LOGGER.debug('Grabbing field information') try: self.fields = self.get_fields() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError(err)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.filesystem.FileSystemProvider """ super().__init__(provider_def) if not os.path.exists(self.data): msg = 'Directory does not exist: {}'.format(self.data) LOGGER.error(msg) raise ProviderConnectionError(msg)
def _request_json(self, url, params): """ Performs a GET request on `url` and returns the JSON response. """ response = None try: response = requests.get(url, params) response.raise_for_status() except requests.HTTPError as err: LOGGER.error(err) raise ProviderQueryError( f'failed to query {response.url if response else url}') except requests.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError( f'failed to connect to {response.url if response else url}') return self._parse_json(response.text)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.cangrdrasterio.CanGRDProvider """ super().__init__(provider_def) try: self.file_list = [] pattern = 'CMC_RDPA_{}cutoff' self.var = search(pattern, self.data)[0] self.get_file_list(self.var) if '*' in self.data: self.data = self.file_list[-1] self._data = rasterio.open(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = self._coverage_properties['axes'] self.axes.append('time') # Rasterio does not read the crs and transform function # properly from the file, we have to set them manually # The CRS is the same for both RDPA resolution # the transform array is different for the 15 km and 10 km files self.crs = '+proj=stere +lat_0=90 +lat_ts=60 +lon_0=249 +x_0=0 +y_0=0 +R=6371229 +units=m +no_defs' # noqa if '10km' in self.data: self.transform = (-4556441.403315245, 10000.0, 0.0, 920682.1411659503, 0.0, -10000.0) else: self.transform = (-2618155.4458640157, 15000.0, 0.0, 7508.80818105489, 0.0, -15000.0) self._data._crs = self.crs self._data._transform = self.transform self.num_bands = self._coverage_properties['num_bands'] self.fields = [str(num) for num in range(1, self.num_bands + 1)] self.native_format = provider_def['format']['name'] # Needed to set the variable for each collection # We intialize the collection matadata through this function self.coverage = self.get_coverage_domainset() except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def __enter__(self): try: search_path = self.conn_dic.pop('search_path', ['public']) if search_path != ['public']: self.conn_dic["options"] = '-c \ search_path={}'.format(",".join(search_path)) LOGGER.debug('Using search path: {} '.format(search_path)) self.conn = psycopg2.connect(**self.conn_dic) self.conn.set_client_encoding('utf8') except psycopg2.OperationalError: LOGGER.error("Couldn't connect to Postgis using:{}".format( str(self.conn_dic))) raise ProviderConnectionError() self.cur = self.conn.cursor() if self.context == 'query': # Get table column names and types, excluding geometry and # transaction ID columns query_cols = "SELECT attr.attname, tp.typname \ FROM pg_catalog.pg_class as cls \ INNER JOIN pg_catalog.pg_attribute as attr \ ON cls.oid = attr.attrelid \ INNER JOIN pg_catalog.pg_type as tp \ ON tp.oid = attr.atttypid \ WHERE cls.relname = '{}' \ AND tp.typname != 'geometry' \ AND tp.typname != 'cid' \ AND tp.typname != 'oid' \ AND tp.typname != 'tid' \ AND tp.typname != 'xid';".format( self.table) self.cur.execute(query_cols) result = self.cur.fetchall() if self.properties: result = [res for res in result if res[0] in self.properties] self.columns = SQL(', ').join( [Identifier(item[0]) for item in result] ) for k, v in dict(result).items(): self.fields[k] = {'type': v} return self
def __enter__(self): try: search_path = self.conn_dic.pop('search_path', ['public']) if search_path != ['public']: self.conn_dic['options'] = f'-c \ search_path={",".join(search_path)}' LOGGER.debug(f'Using search path: {search_path} ') self.conn = psycopg2.connect(**self.conn_dic) except psycopg2.OperationalError: LOGGER.error( f'Couldn\'t connect to Postgis using: {self.conn_dic!s}') raise ProviderConnectionError() self.cur = self.conn.cursor() return self
def get_tiles(self, layer=None, tileset=None, z=None, y=None, x=None, format_=None): """ Gets tile :param layer: mvt tile layer :param tileset: mvt tileset :param z: z index :param y: y index :param x: x index :param format_: tile format :returns: an encoded mvt tile """ if format_ == "mvt": format_ = self.format_type if is_url(self.data): url = urlparse(self.data) base_url = '{}://{}'.format(url.scheme, url.netloc) with requests.Session() as session: session.get(base_url) resp = session.get('{base_url}/{lyr}/{z}/{y}/{x}.{f}'.format( base_url=base_url, lyr=layer, z=z, y=y, x=x, f=format_)) resp.raise_for_status() return resp.content else: if not isinstance(self.service_url, Path): msg = 'Wrong data path configuration: {}'.format( self.service_url) LOGGER.error(msg) raise ProviderConnectionError(msg) else: try: with open( self.service_url.joinpath('{z}/{y}/{x}.{f}'.format( z=z, y=y, x=x, f=format_)), 'rb') as tile: return tile.read() except FileNotFoundError as err: raise ProviderTileNotFoundError(err)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.providers.rasterio_.RasterioProvider """ BaseProvider.__init__(self, provider_def) try: self._data = rasterio.open(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = self._coverage_properties['axes'] self.crs = self._coverage_properties['bbox_crs'] self.num_bands = self._coverage_properties['num_bands'] except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.rasterio_.RasterioProvider """ super().__init__(provider_def) try: self._data = rasterio.open(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = self._coverage_properties['axes'] self.crs = self._coverage_properties['bbox_crs'] self.num_bands = self._coverage_properties['num_bands'] self.fields = [str(num) for num in range(1, self.num_bands + 1)] self.native_format = provider_def['format']['name'] except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.cansips_rasterio.CanSIPSProvider """ super().__init__(provider_def) try: self.file_list = [] self.member = [] self.var = 'cansips_forecast_raw_latlon2.5x2.5_TMP_TGL_2m_' self.get_file_list(self.var) self.data = self.file_list[0] self.var_list = ['cansips_forecast_raw_latlon2.5x2.5_TMP_TGL_2m', 'cansips_forecast_raw_latlon2.5x2.5_HGT_ISBL_0500', # noqa 'cansips_forecast_raw_latlon2.5x2.5_PRATE_SFC_0', 'cansips_forecast_raw_latlon2.5x2.5_PRMSL_MSL_0', 'cansips_forecast_raw_latlon2.5x2.5_TMP_ISBL_0850', # noqa 'cansips_forecast_raw_latlon2.5x2.5_WTMP_SFC_0'] self._data = rasterio.open(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = self._coverage_properties['axes'] self.axes.extend(['time', 'dim_reference_time', 'member']) self.num_bands = self._coverage_properties['num_bands'] self.crs = self._coverage_properties['bbox_crs'] self.fields = [str(num) for num in range(1, self.num_bands+1)] self.native_format = provider_def['format']['name'] # Needed to set the variable for each collection # We intialize the collection matadata through this function self.coverage = self.get_coverage_domainset() except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def _request_json(self, url, params): """ Performs a GET request on `url` and returns the JSON response. """ response = None if 'lang' not in params and self.locale: # Add language parameter, if missing (geoCore wants ISO 639-1 codes) # noqa LOGGER.debug( f"Requesting geoCore response in '{self.locale.language}'" ) # noqa params['lang'] = self.locale.language try: response = requests.get(url, params) response.raise_for_status() except requests.HTTPError as err: LOGGER.error(err) raise ProviderQueryError( f'failed to query {response.url if response else url}') except requests.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError( f'failed to connect to {response.url if response else url}') return self._parse_json(response.text)
def __init__(self, provider_def): """ Initialize object :param provider_def: provider definition :returns: pygeoapi.provider.cangrdrasterio.CanGRDProvider """ super().__init__(provider_def) try: self._data = rasterio.open(self.data) self._coverage_properties = self._get_coverage_properties() self.axes = self._coverage_properties['axes'] if 'season' in self.data: self.axes.append('season') self.crs = self._coverage_properties['bbox_crs'] self.num_bands = self._coverage_properties['num_bands'] # list of variables are not in metadata # we need to have them in the code self.fields = ['tmean', 'tmax', 'tmin', 'pcp'] self.native_format = provider_def['format']['name'] except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err)
def handler(self, err_level, err_num, err_msg): """ Define custom GDAL error handler function :param err_level: error level :param err_num: internal gdal error number :param err_msg: error message :returns: pygeoapi.provider.ogr.GdalErrorHandler """ err_type = { osgeo_gdal.CE_None: 'None', osgeo_gdal.CE_Debug: 'Debug', osgeo_gdal.CE_Warning: 'Warning', osgeo_gdal.CE_Failure: 'Failure', osgeo_gdal.CE_Fatal: 'Fatal' } err_msg = err_msg.replace('\n', ' ') level = err_type.get(err_level, 'None') self.err_level = err_level self.err_num = err_num self.err_msg = err_msg LOGGER.error('Error Number: %s, Type: %s, Msg: %s' % (self.err_num, level, self.err_msg)) last_error = osgeo_gdal.GetLastErrorMsg() if self.err_level >= osgeo_gdal.CE_Failure: if 'HTTP error code' in last_error: # 500 <= http error ode <=599 for i in list(range(500, 599)): if str(i) in last_error: raise ProviderConnectionError(last_error) else: raise ProviderGenericError(last_error)
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False, q=None, filterq=None, **kwargs): """ query Elasticsearch index :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :param q: full-text search term(s) :param filterq: filter object :returns: dict of 0..n GeoJSON features """ query = {'track_total_hits': True, 'query': {'bool': {'filter': []}}} filter_ = [] feature_collection = {'type': 'FeatureCollection', 'features': []} if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox bbox_filter = { 'geo_shape': { 'geometry': { 'shape': { 'type': 'envelope', 'coordinates': [[minx, maxy], [maxx, miny]] }, 'relation': 'intersects' } } } query['query']['bool']['filter'].append(bbox_filter) if datetime_ is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() time_field = self.mask_prop(self.time_field) if '/' in datetime_: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime_.split('/') range_ = { 'range': { time_field: { 'gte': time_begin, 'lte': time_end } } } if time_begin == '..': range_['range'][time_field].pop('gte') elif time_end == '..': range_['range'][time_field].pop('lte') filter_.append(range_) else: # time instant LOGGER.debug('detected time instant') filter_.append({'match': {time_field: datetime_}}) LOGGER.debug(filter_) query['query']['bool']['filter'].append(*filter_) if properties: LOGGER.debug('processing properties') for prop in properties: prop_name = self.mask_prop(prop[0]) pf = {'match': {prop_name: {'query': prop[1]}}} query['query']['bool']['filter'].append(pf) if '|' not in prop[1]: pf['match'][prop_name]['minimum_should_match'] = '100%' if sortby: LOGGER.debug('processing sortby') query['sort'] = [] for sort in sortby: LOGGER.debug('processing sort object: {}'.format(sort)) sp = sort['property'] if self.fields[sp]['type'] == 'string': LOGGER.debug('setting ES .raw on property') sort_property = '{}.raw'.format(self.mask_prop(sp)) else: sort_property = self.mask_prop(sp) sort_order = 'asc' if sort['order'] == '-': sort_order = 'desc' sort_ = {sort_property: {'order': sort_order}} query['sort'].append(sort_) if q is not None: LOGGER.debug('Adding free-text search') query['query']['bool']['must'] = {'query_string': {'query': q}} query['_source'] = { 'excludes': [ 'properties._metadata-payload', 'properties._metadata-schema', 'properties._metadata-format' ] } if self.properties or select_properties: LOGGER.debug('including specified fields: {}'.format( self.properties)) query['_source'] = { 'includes': list( map(self.mask_prop, set(self.properties) | set(select_properties))) # noqa } query['_source']['includes'].append(self.mask_prop(self.id_field)) query['_source']['includes'].append('type') query['_source']['includes'].append('geometry') if skip_geometry: LOGGER.debug( 'limiting to specified fields: {}'.format(select_properties)) try: query['_source']['excludes'] = ['geometry'] except KeyError: query['_source'] = {'excludes': ['geometry']} try: LOGGER.debug('querying Elasticsearch') if filterq: LOGGER.debug('adding cql object: {}'.format(filterq.json())) query = update_query(input_query=query, cql=filterq) LOGGER.debug(json.dumps(query, indent=4)) LOGGER.debug('Setting ES paging zero-based') if startindex > 0: startindex2 = startindex - 1 else: startindex2 = startindex if startindex2 + limit > 10000: gen = helpers.scan(client=self.es, query=query, preserve_order=True, index=self.index_name) results = {'hits': {'total': limit, 'hits': []}} for i in range(startindex2 + limit): try: if i >= startindex2: results['hits']['hits'].append(next(gen)) else: next(gen) except StopIteration: break results['hits']['total'] = \ len(results['hits']['hits']) + startindex2 else: results = self.es.search(index=self.index_name, from_=startindex2, size=limit, body=query) results['hits']['total'] = results['hits']['total']['value'] except exceptions.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError() except exceptions.RequestError as err: LOGGER.error(err) raise ProviderQueryError() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['numberMatched'] = results['hits']['total'] if resulttype == 'hits': return feature_collection feature_collection['numberReturned'] = len(results['hits']['hits']) LOGGER.debug('serializing features') for feature in results['hits']['hits']: feature_ = self.esdoc2geojson(feature) feature_collection['features'].append(feature_) return feature_collection
def __load(self): """ Private method for loading spatiallite, get the table structure and dump geometry :returns: sqlite3.Cursor """ if (os.path.exists(self.data)): conn = sqlite3.connect(self.data) else: LOGGER.error('Path to sqlite does not exist') raise InvalidPluginError() try: conn.enable_load_extension(True) except AttributeError as err: LOGGER.error('Extension loading not enabled: {}'.format(err)) raise ProviderConnectionError() conn.row_factory = sqlite3.Row conn.enable_load_extension(True) # conn.set_trace_callback(LOGGER.debug) cursor = conn.cursor() try: cursor.execute(f"SELECT load_extension('{SPATIALITE_EXTENSION}')") except sqlite3.OperationalError as err: LOGGER.error('Extension loading error: {}'.format(err)) raise ProviderConnectionError() result = cursor.fetchall() # Checking for geopackage cursor.execute("PRAGMA application_id") result = cursor.fetchone() self.application_id = result["application_id"] if self.application_id == 1196444487: LOGGER.info("Detected GPKG 1.2 and greater") elif self.application_id == 1196437808: LOGGER.info("Detected GPKG 1.0 or 1.1") else: LOGGER.info("No GPKG detected assuming spatial sqlite3") self.application_id = 0 if self.application_id: cursor.execute("SELECT AutoGPKGStart()") result = cursor.fetchall() if result[0][0] == 1: LOGGER.info("Loaded Geopackage support") else: LOGGER.info("SELECT AutoGPKGStart() returned 0." + "Detected GPKG but couldnt load support") raise InvalidPluginError if self.application_id: self.geom_col = "geom" else: self.geom_col = "geometry" try: cursor.execute('PRAGMA table_info({})'.format(self.table)) result = cursor.fetchall() except sqlite3.OperationalError: LOGGER.error('Couldnt find table: {}'.format(self.table)) raise ProviderConnectionError() try: assert len(result), 'Table not found' assert len([item for item in result if self.id_field in item]), 'id_field not present' except AssertionError: raise InvalidPluginError self.columns = [item[1] for item in result if item[1] != self.geom_col] self.columns = ','.join(self.columns) + ',AsGeoJSON({})'.format( self.geom_col) if self.application_id: self.table = "vgpkg_{}".format(self.table) return cursor
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime_=None, properties=[], sortby=[], select_properties=[], skip_geometry=False): """ Query OGR source :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime_: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :param select_properties: list of property names :param skip_geometry: bool of whether to skip geometry (default False) :returns: dict of 0..n GeoJSON features """ result = None try: if self.source_capabilities['paging']: self.source_helper.enable_paging(startindex, limit) layer = self._get_layer() if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox wkt = "POLYGON (({minx} {miny},{minx} {maxy},{maxx} {maxy}," \ "{maxx} {miny},{minx} {miny}))".format( minx=float(minx), miny=float(miny), maxx=float(maxx), maxy=float(maxy)) polygon = self.ogr.CreateGeometryFromWkt(wkt) if self.transform_in: polygon.Transform(self.transform_in) layer.SetSpatialFilter(polygon) # layer.SetSpatialFilterRect( # float(minx), float(miny), float(maxx), float(maxy)) if properties: LOGGER.debug('processing properties') attribute_filter = ' and '.join( map(lambda x: '{} = \'{}\''.format(x[0], x[1]), properties)) LOGGER.debug(attribute_filter) layer.SetAttributeFilter(attribute_filter) # Make response based on resulttype specified if resulttype == 'hits': LOGGER.debug('hits only specified') result = self._response_feature_hits(layer) elif resulttype == 'results': LOGGER.debug('results specified') result = self._response_feature_collection(layer, limit) else: LOGGER.error('Invalid resulttype: %s' % resulttype) except RuntimeError as err: LOGGER.error(err) raise ProviderQueryError(err) except ProviderConnectionError as err: LOGGER.error(err) raise ProviderConnectionError(err) except Exception as err: LOGGER.error(err) raise ProviderGenericError(err) finally: self._close() return result
def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime=None, properties=[], sortby=[]): """ query Elasticsearch index :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :returns: dict of 0..n GeoJSON features """ query = {'query': {'bool': {'filter': []}}} filter_ = [] feature_collection = {'type': 'FeatureCollection', 'features': []} if resulttype == 'hits': LOGGER.debug('hits only specified') limit = 0 if bbox: LOGGER.debug('processing bbox parameter') minx, miny, maxx, maxy = bbox bbox_filter = { 'geo_shape': { 'geometry': { 'shape': { 'type': 'envelope', 'coordinates': [[minx, miny], [maxx, maxy]] }, 'relation': 'intersects' } } } query['query']['bool']['filter'].append(bbox_filter) if datetime is not None: LOGGER.debug('processing datetime parameter') if self.time_field is None: LOGGER.error('time_field not enabled for collection') raise ProviderQueryError() time_field = 'properties.{}'.format(self.time_field) if '/' in datetime: # envelope LOGGER.debug('detected time range') time_begin, time_end = datetime.split('/') range_ = { 'range': { time_field: { 'gte': time_begin, 'lte': time_end } } } if time_begin == '..': range_['range'][time_field].pop('gte') elif time_end == '..': range_['range'][time_field].pop('lte') filter_.append(range_) else: # time instant LOGGER.debug('detected time instant') filter_.append({'match': {time_field: datetime}}) LOGGER.debug(filter_) query['query']['bool']['filter'].append(filter_) if properties: LOGGER.debug('processing properties') for prop in properties: pf = {'match': {'properties.{}'.format(prop[0]): prop[1]}} query['query']['bool']['filter'].append(pf) if sortby: LOGGER.debug('processing sortby') query['sort'] = [] for sort in sortby: LOGGER.debug('processing sort object: {}'.format(sort)) sp = sort['property'] if self.fields[sp]['type'] == 'string': LOGGER.debug('setting ES .raw on property') sort_property = 'properties.{}.raw'.format(sp) else: sort_property = 'properties.{}'.format(sp) sort_order = 'asc' if sort['order'] == 'D': sort_order = 'desc' sort_ = {sort_property: {'order': sort_order}} query['sort'].append(sort_) if self.properties: LOGGER.debug('including specified fields: {}'.format( self.properties)) query['_source'] = { 'includes': list(map('properties.{}'.format, self.properties)) } query['_source']['includes'].append('properties.{}'.format( self.id_field)) query['_source']['includes'].append('type') query['_source']['includes'].append('geometry') try: LOGGER.debug('querying Elasticsearch') if startindex + limit > 10000: gen = helpers.scan(client=self.es, query=query, preserve_order=True, index=self.index_name) results = {'hits': {'total': limit, 'hits': []}} for i in range(startindex + limit): try: if i >= startindex: results['hits']['hits'].append(next(gen)) else: next(gen) except StopIteration: break results['hits']['total'] = \ len(results['hits']['hits']) + startindex else: results = self.es.search(index=self.index_name, from_=startindex, size=limit, body=query) except exceptions.ConnectionError as err: LOGGER.error(err) raise ProviderConnectionError() except exceptions.RequestError as err: LOGGER.error(err) raise ProviderQueryError() except exceptions.NotFoundError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['numberMatched'] = results['hits']['total'] if resulttype == 'hits': return feature_collection feature_collection['numberReturned'] = len(results['hits']['hits']) LOGGER.debug('serializing features') for feature in results['hits']['hits']: id_ = feature['_source']['properties'][self.id_field] LOGGER.debug('serializing id {}'.format(id_)) feature['_source']['id'] = id_ if self.properties: feature_thinned = { 'id': feature['_source']['properties'][self.id_field], 'type': feature['_source']['type'], 'geometry': feature['_source']['geometry'], 'properties': OrderedDict() } for p in self.properties: try: feature_thinned['properties'][p] = \ feature['_source']['properties'][p] except KeyError as err: LOGGER.error(err) raise ProviderQueryError() feature_collection['features'].append(feature_thinned) else: feature_collection['features'].append(feature['_source']) return feature_collection