def put_resource_data(resource_id): content_type = "application/octet-stream" if request.headers.get("Content-type"): content_type, _ = parse_header(request.headers["Content-type"]) with db.cursor() as cur: cur.execute( """ SELECT id, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=resource_id), ) resource = cur.fetchone() if resource is None: raise NotFound() # todo: better use a streaming response here..? with db, db.cursor() as cur: lobj = db.lobject(oid=resource["data_oid"], mode="wb") lobj.seek(0) lobj.truncate() lobj.write(request.data) lobj.close() resource_hash = "sha1:" + hashlib.sha1(request.data).hexdigest() data = dict(id=resource_id, mimetype=content_type, mtime=datetime.datetime.utcnow(), hash=resource_hash) query = querybuilder.update("resource", data) cur.execute(query, data) return "", 200
def put_resource_data(resource_id): content_type = 'application/octet-stream' if request.headers.get('Content-type'): content_type, _ = parse_header(request.headers['Content-type']) with db.cursor() as cur: cur.execute( """ SELECT id, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() # todo: better use a streaming response here..? with db, db.cursor() as cur: lobj = db.lobject(oid=resource['data_oid'], mode='wb') lobj.seek(0) lobj.truncate() lobj.write(request.data) lobj.close() resource_hash = 'sha1:' + hashlib.sha1(request.data).hexdigest() data = dict(id=resource_id, mimetype=content_type, mtime=datetime.datetime.utcnow(), hash=resource_hash) query = querybuilder.update('resource', data) cur.execute(query, data) return '', 200
def on_dataset_create(dataset_id, dataset_conf): with db, db.cursor() as cur: cur.execute(""" INSERT INTO dummy_plugin (dataset_id, foo, bar) VALUES (%s, %s, %s); """, (dataset_id, 'FOO data', 'BAR data'))
def on_dataset_update(dataset_id, dataset_conf): with db, db.cursor() as cur: cur.execute(""" UPDATE dummy_plugin SET foo='Updated FOO data' WHERE dataset_id=%s; """, (dataset_id,))
def get_dataset(dataset_id): """ API view returning information for a single dataset. :HTTP url: ``/data/<dataset_id>`` :HTTP methods: ``GET`` **Example request:** .. code-block:: http GET /api/1/data/1 HTTP/1.0 **Example response:** .. code-block:: http HTTP/1.0 200 OK Content-type: application/json .. code-block:: python {"id": 1, ...} """ with db.cursor() as cur: cur.execute("SELECT id, configuration FROM dataset" " WHERE id=%s;", (dataset_id,)) result = cur.fetchone() if result is None: raise NotFound("Dataset not found: {0}".format(dataset_id)) return _make_plugins_make_dataset_metadata( result['id'], result['configuration'])
def import_geo_dataset(dataset_id): """ Task to import geographical resources from a dataset into a PostGIS table. :param dataset_id: Id of the dataset to import """ with db.cursor() as cur: cur.execute("SELECT id, configuration FROM dataset" " WHERE id=%s;", (dataset_id,)) result = cur.fetchone() if result is None: # Should be logged as an error raise NotFound("Dataset not found: {0}".format(dataset_id)) conf = result['configuration'] if not conf.get('geo', {}).get('enabled', False): # Task was called from hook.. by error? raise ValueError("Requested import for non-geo-enabled dataset") if conf['geo']['importer'] == 'find_shapefiles': return import_dataset_find_shapefiles(dataset_id, conf) else: raise ValueError("Unsupported importer: {0}" .format(conf['geo']['importer']))
def on_dataset_delete(dataset_id): with db, db.cursor() as cur: cur.execute( """ DELETE FROM dummy_plugin WHERE dataset_id=%s; """, (dataset_id, ))
def install(self): with db, db.cursor() as cur: cur.execute(""" CREATE TABLE dummy_plugin ( dataset_id INTEGER PRIMARY KEY, foo TEXT, bar TEXT); """)
def delete_dataset_configuration(dataset_id): with db, db.cursor() as cur: query = querybuilder.delete("dataset") cur.execute(query, dict(id=dataset_id)) current_app.plugins.call_hook("dataset_delete", dataset_id) return "", 200
def delete_dataset_configuration(dataset_id): with db, db.cursor() as cur: query = querybuilder.delete('dataset') cur.execute(query, dict(id=dataset_id)) current_app.plugins.call_hook('dataset_delete', dataset_id) return '', 200
def _get_dataset_record(dataset_id): with db.cursor() as cur: query = querybuilder.select_pk('dataset') cur.execute(query, dict(id=dataset_id)) dataset = cur.fetchone() if dataset is None: raise NotFound() return dataset
def on_dataset_create(dataset_id, dataset_conf): with db, db.cursor() as cur: cur.execute( """ INSERT INTO dummy_plugin (dataset_id, foo, bar) VALUES (%s, %s, %s); """, (dataset_id, 'FOO data', 'BAR data'))
def _get_dataset_record(dataset_id): with db.cursor() as cur: query = querybuilder.select_pk("dataset") cur.execute(query, dict(id=dataset_id)) dataset = cur.fetchone() if dataset is None: raise NotFound() return dataset
def on_dataset_update(dataset_id, dataset_conf): with db, db.cursor() as cur: cur.execute( """ UPDATE dummy_plugin SET foo='Updated FOO data' WHERE dataset_id=%s; """, (dataset_id, ))
def put_resource_metadata(resource_id): new_metadata = _get_json_from_request() with db.cursor() as cur: query = querybuilder.select_pk("resource", fields="id, metadata") cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound("This resource does not exist") with db, db.cursor() as cur: data = dict(id=resource_id, metadata=json.dumps(new_metadata)) query = querybuilder.update("resource", data) cur.execute(query, data) return "", 200
def put_resource_metadata(resource_id): new_metadata = _get_json_from_request() with db.cursor() as cur: query = querybuilder.select_pk('resource', fields='id, metadata') cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound('This resource does not exist') with db, db.cursor() as cur: data = dict(id=resource_id, metadata=json.dumps(new_metadata)) query = querybuilder.update('resource', data) cur.execute(query, data) return '', 200
def test_geo_import_shapefile(configured_app, data_dir): # ============================================================ # - Create a resource containing a Zip file containing # some shapefiles # - Create a dataset configured to import data from the # resource as a geographical dataset # - Check that everything worked as expected (give it a # little time to do stuff) # ============================================================ apptc = configured_app.test_client() with open(str(data_dir.join('geodata/roads-folders.zip')), 'rb') as fp: payload = fp.read() resp = apptc.post('/api/1/admin/resource/', headers={'Content-type': 'application/zip'}, data=payload) # Obtain the resource id resource_url = resp.headers['Location'] path = urlparse.urlparse(resource_url).path match = re.match('/api/1/admin/resource/([0-9]+)', path) resource_id = int(match.group(1)) # Create geo dataset with this resource dataset_conf = { 'metadata': { 'title': 'Some Trentino roads from OpenStreetMap', }, 'resources': [ 'internal:///{0}'.format(resource_id), ], 'geo': { 'enabled': True, 'importer': 'find_shapefiles', } } resp = apptc.post('/api/1/admin/dataset/', headers={'Content-type': 'application/json'}, data=json.dumps(dataset_conf)) # Obtain the dataset id path = urlparse.urlparse(resp.headers['Location']).path match = re.match('/api/1/admin/dataset/([0-9]+)', path) dataset_id = int(match.group(1)) time.sleep(3) # ------------------------------------------------------------ # Now, we're ready to check! # ------------------------------------------------------------ # First, check that the table exists and it is populated with configured_app.app_context(): with db, db.cursor() as cur: cur.execute("""SELECT * FROM "geodata_{0}";""".format(dataset_id)) assert len(list(cur)) == 40 # 10 items, 4 shapefiles
def get_resource_metadata(resource_id): with db.cursor() as cur: query = querybuilder.select_pk('resource', fields='id, metadata') cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() return resource['metadata']
def get_resource_metadata(resource_id): with db.cursor() as cur: query = querybuilder.select_pk("resource", fields="id, metadata") cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() return resource["metadata"]
def delete_resource_data(resource_id): with db.cursor() as cur: cur.execute( """ SELECT id, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=resource_id)) resource = cur.fetchone() # todo: better use a streaming response here..? with db: lobj = db.lobject(oid=resource['data_oid'], mode='wb') lobj.unlink() # Then, create a record for the metadata with db, db.cursor() as cur: query = querybuilder.delete('resource') cur.execute(query, dict(id=resource_id)) db.commit() return '', 200
def delete_resource_data(resource_id): with db.cursor() as cur: cur.execute( """ SELECT id, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=resource_id), ) resource = cur.fetchone() # todo: better use a streaming response here..? with db: lobj = db.lobject(oid=resource["data_oid"], mode="wb") lobj.unlink() # Then, create a record for the metadata with db, db.cursor() as cur: query = querybuilder.delete("resource") cur.execute(query, dict(id=resource_id)) db.commit() return "", 200
def _resource_record(self): with db, db.cursor() as cur: cur.execute(""" SELECT id, mimetype, mtime, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=self._resource_id)) resource = cur.fetchone() if resource is None: raise ResourceNotFound( "The resource was not found in the database") return resource
def _update_dataset_record(dataset_id, fields): fields["id"] = dataset_id _configuration = None if "configuration" in fields: _configuration = fields["configuration"] fields["configuration"] = json.dumps(fields["configuration"]) fields["mtime"] = datetime.datetime.utcnow() query = querybuilder.update("dataset", fields) with db, db.cursor() as cur: cur.execute(query, fields) current_app.plugins.call_hook("dataset_update", dataset_id, _configuration)
def _update_dataset_record(dataset_id, fields): fields['id'] = dataset_id _configuration = None if 'configuration' in fields: _configuration = fields['configuration'] fields['configuration'] = json.dumps(fields['configuration']) fields['mtime'] = datetime.datetime.utcnow() query = querybuilder.update('dataset', fields) with db, db.cursor() as cur: cur.execute(query, fields) current_app.plugins.call_hook('dataset_update', dataset_id, _configuration)
def get_resource_index(): # todo: add paging support with db, db.cursor() as cur: cur.execute(""" SELECT id, metadata, mimetype, mtime, ctime FROM resource ORDER BY id ASC """) return list({ 'id': x['id'], 'metadata': x['metadata'], 'mimetype': x['mimetype'], 'ctime': x['ctime'].strftime(DATE_FORMAT), 'mtime': x['mtime'].strftime(DATE_FORMAT) } for x in cur.fetchall())
def get_dataset_index(): # todo: add paging support with db.cursor() as cur: cur.execute(""" SELECT id, configuration, ctime, mtime FROM dataset ORDER BY id ASC """) return list({ 'id': x['id'], 'configuration': x['configuration'], 'ctime': x['ctime'].strftime(DATE_FORMAT), 'mtime': x['mtime'].strftime(DATE_FORMAT) } for x in cur.fetchall())
def _resource_record(self): with db, db.cursor() as cur: cur.execute( """ SELECT id, mimetype, mtime, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=self._resource_id)) resource = cur.fetchone() if resource is None: raise ResourceNotFound( "The resource was not found in the database") return resource
def get_resource_index(): # todo: add paging support with db, db.cursor() as cur: cur.execute( """ SELECT id, metadata, mimetype, mtime, ctime FROM resource ORDER BY id ASC """ ) return list( { "id": x["id"], "metadata": x["metadata"], "mimetype": x["mimetype"], "ctime": x["ctime"].strftime(DATE_FORMAT), "mtime": x["mtime"].strftime(DATE_FORMAT), } for x in cur.fetchall() )
def get_dataset_index(): # todo: add paging support with db.cursor() as cur: cur.execute( """ SELECT id, configuration, ctime, mtime FROM dataset ORDER BY id ASC """ ) return list( { "id": x["id"], "configuration": x["configuration"], "ctime": x["ctime"].strftime(DATE_FORMAT), "mtime": x["mtime"].strftime(DATE_FORMAT), } for x in cur.fetchall() )
def get_dataset_index(): """ API view returning a (paged) list of datasets. :HTTP url: ``/data/`` :HTTP methods: ``GET`` The view returns a list of dictionaries representing dataset objects. The schema is entirely up to the enabled plugins; the core implementation only guarantees that the ``id`` field is set to the correct dataset id. **Example request:** .. code-block:: http GET /api/1/data/ HTTP/1.0 **Example response:** .. code-block:: http HTTP/1.0 200 OK Content-type: application/json Link: ?start=10&size=10; rel=next, ?start=50&size=10; rel=last X-page-total: 60 X-page-start: 0 X-page-size: 10 .. code-block:: python [{"id": 1}, {"id": 2}, {"id": 3}, ..., {"id": 10}] """ # todo: add paging support with db.cursor() as cur: cur.execute(""" SELECT id, configuration FROM dataset ORDER BY id ASC """) return [_make_plugins_make_dataset_metadata(x['id'], x['configuration']) for x in cur.fetchall()]
def post_dataset_index(): content_type = 'application/octet-stream' if request.headers.get('Content-type'): content_type, _ = parse_header(request.headers['Content-type']) data = _get_json_from_request() with db, db.cursor() as cur: cur.execute( """ INSERT INTO "dataset" (configuration, ctime, mtime) VALUES (%(conf)s::json, %(mtime)s, %(mtime)s) RETURNING id; """, dict(conf=json.dumps(data), mtime=datetime.datetime.utcnow())) dataset_id = cur.fetchone()[0] current_app.plugins.call_hook('dataset_create', dataset_id, data) # Last, retun 201 + Location: header location = url_for('.get_dataset_configuration', dataset_id=dataset_id) return '', 201, {'Location': location}
def _get_internal_resource_data(resource_id): """Get all data form an internally-stored resource""" # todo: improve this function to avoid keeping the whole thing in memory # todo: also, make this more generic (and move from here) with db, db.cursor() as cur: cur.execute(""" SELECT id, mimetype, data_oid FROM "resource" WHERE id = %(id)s; """, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() with db: lobject = db.lobject(oid=resource['data_oid'], mode='rb') data = lobject.read() lobject.close() return data
def post_resource_index(): """ We got some data to be stored as a new resource. Then we want to return 201 + URL of the created resource in the Location: header. """ content_type = "application/octet-stream" if request.headers.get("Content-type"): content_type, _ = parse_header(request.headers["Content-type"]) # First, store the data in a PostgreSQL large object with db, db.cursor() as cur: lobj = db.lobject(oid=0, mode="wb") oid = lobj.oid lobj.write(request.data) lobj.close() resource_hash = "sha1:" + hashlib.sha1(request.data).hexdigest() data = dict( metadata="{}", auto_metadata="{}", mimetype=content_type, data_oid=oid, ctime=datetime.datetime.utcnow(), mtime=datetime.datetime.utcnow(), hash=resource_hash, ) # Then, create a record for the metadata query = querybuilder.insert("resource", data) cur.execute(query, data) resource_id = cur.fetchone()[0] # Last, retun 201 + Location: header location = url_for(".get_resource_data", resource_id=resource_id) return "", 201, {"Location": location}
def post_dataset_index(): content_type = "application/octet-stream" if request.headers.get("Content-type"): content_type, _ = parse_header(request.headers["Content-type"]) data = _get_json_from_request() with db, db.cursor() as cur: cur.execute( """ INSERT INTO "dataset" (configuration, ctime, mtime) VALUES (%(conf)s::json, %(mtime)s, %(mtime)s) RETURNING id; """, dict(conf=json.dumps(data), mtime=datetime.datetime.utcnow()), ) dataset_id = cur.fetchone()[0] current_app.plugins.call_hook("dataset_create", dataset_id, data) # Last, retun 201 + Location: header location = url_for(".get_dataset_configuration", dataset_id=dataset_id) return "", 201, {"Location": location}
def post_resource_index(): """ We got some data to be stored as a new resource. Then we want to return 201 + URL of the created resource in the Location: header. """ content_type = 'application/octet-stream' if request.headers.get('Content-type'): content_type, _ = parse_header(request.headers['Content-type']) # First, store the data in a PostgreSQL large object with db, db.cursor() as cur: lobj = db.lobject(oid=0, mode='wb') oid = lobj.oid lobj.write(request.data) lobj.close() resource_hash = 'sha1:' + hashlib.sha1(request.data).hexdigest() data = dict(metadata='{}', auto_metadata='{}', mimetype=content_type, data_oid=oid, ctime=datetime.datetime.utcnow(), mtime=datetime.datetime.utcnow(), hash=resource_hash) # Then, create a record for the metadata query = querybuilder.insert('resource', data) cur.execute(query, data) resource_id = cur.fetchone()[0] # Last, retun 201 + Location: header location = url_for('.get_resource_data', resource_id=resource_id) return '', 201, {'Location': location}
def uninstall(self): with db, db.cursor() as cur: cur.execute(""" DROP TABLE dummy_plugin; """)
def on_dataset_delete(dataset_id): with db, db.cursor() as cur: cur.execute(""" DELETE FROM dummy_plugin WHERE dataset_id=%s; """, (dataset_id,))
def import_dataset_find_shapefiles(dataset_id, dataset_conf): """ Find all the Shapefiles from archives listed as dataset resources. :param dataset_id: The dataset id :param dataset_conf: The dataset configuration """ destination_table = 'geodata_{0}'.format(dataset_id) create_table_sqls = [] import_data_sqls = [] with TemporaryDir() as tempdir: # First, copy zip files to temporary directory for resource in dataset_conf['resources']: # We assume the file is a zip, but we should double-check that! dest_file = os.path.join(tempdir, _random_file_name('zip')) if isinstance(resource, basestring): resource = {'url': resource} # Copy the resource to disk _copy_resource_to_file(resource, dest_file) # Let's look for shapefiles inside that thing.. found = find_shapefiles(dest_file) for basename, files in found.iteritems(): if 'shp' not in files: continue # Bad match.. # Export shapefiles to temporary files base_name = _random_file_name() for ext, item in files.iteritems(): dest = os.path.join(tempdir, base_name + '.' + ext) with open(dest, 'wb') as fp: # todo: copy file in chunks, not as a whole fp.write(item.open().read()) shp_full_path = os.path.join(tempdir, base_name + '.shp') create_table_sql = shp2pgsql( shp_full_path, table=destination_table, create_table_only=True, mode='create', geometry_column='geom', create_gist_index=True) # Use TEXT fields instead of varchar(XX) # todo: use a less-hackish way!! create_table_sql = re.sub( r'varchar\([0-9]+\)', 'text', create_table_sql, flags=re.IGNORECASE) import_data_sql = shp2pgsql( shp_full_path, table=destination_table, mode='append', geometry_column='geom', create_gist_index=False) create_table_sqls.append(create_table_sql) import_data_sqls.append(import_data_sql) with admin_db, admin_db.cursor() as cur: cur.execute(create_table_sqls[0]) with db, db.cursor() as cur: for sql in import_data_sqls: cur.execute(sql)
def serve_resource(resource_id, transfer_block_size=4096): """ Serve resource data via HTTP, setting ETag and Last-Modified headers and honoring ``If-None-Match`` and ``If-modified-since`` headers. Currently supported features: - Set ``ETag`` header (to the hash of resource body) - Set ``Last-Modified`` header (to the last modification date) - Honor the ``If-modified-since`` header (if the resource was not modified, return 304) Planned features: - Return response as a stream, to avoid loading everything in memory. - Honor the ``If-Match`` / ``If-None-Match`` headers - Support ``Range`` requests + 206 partial response - Set ``Cache-control`` and ``Expire`` headers (?) - Properly support HEAD requests. :param resource_id: Id of the resource to be served :param transfer_block_size: Size of the streaming response size. Defaults to 4096 bytes. :return: A valid return value for a Flask view. """ with db, db.cursor() as cur: query = querybuilder.select_pk("resource", fields="id, mimetype, data_oid, mtime, hash") cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() mimetype = resource["mimetype"] or "application/octet-stream" headers = { "Content-type": mimetype, "Last-modified": resource["mtime"].strftime(HTTP_DATE_FORMAT), "ETag": resource["hash"], } # ------------------------------------------------------------ # Check the if-modified-since header if "if-modified-since" in request.headers: try: if_modified_since_date = datetime.strptime(request.headers["if-modified-since"], HTTP_DATE_FORMAT) except: raise BadRequest("Invalid If-Modified-Since header value") if if_modified_since_date >= resource["mtime"]: # The resource was not modified -> return ``304 NOT MODIFIED`` return Response("", status=304, headers=headers) # ------------------------------------------------------------ # Stream the response data with db: lobject = db.lobject(oid=resource["data_oid"], mode="rb") data = lobject.read() lobject.close() return Response(data, status=200, headers=headers)
def serve_resource(resource_id, transfer_block_size=4096): """ Serve resource data via HTTP, setting ETag and Last-Modified headers and honoring ``If-None-Match`` and ``If-modified-since`` headers. Currently supported features: - Set ``ETag`` header (to the hash of resource body) - Set ``Last-Modified`` header (to the last modification date) - Honor the ``If-modified-since`` header (if the resource was not modified, return 304) Planned features: - Return response as a stream, to avoid loading everything in memory. - Honor the ``If-Match`` / ``If-None-Match`` headers - Support ``Range`` requests + 206 partial response - Set ``Cache-control`` and ``Expire`` headers (?) - Properly support HEAD requests. :param resource_id: Id of the resource to be served :param transfer_block_size: Size of the streaming response size. Defaults to 4096 bytes. :return: A valid return value for a Flask view. """ with db, db.cursor() as cur: query = querybuilder.select_pk( 'resource', fields='id, mimetype, data_oid, mtime, hash') cur.execute(query, dict(id=resource_id)) resource = cur.fetchone() if resource is None: raise NotFound() mimetype = resource['mimetype'] or 'application/octet-stream' headers = { 'Content-type': mimetype, 'Last-modified': resource['mtime'].strftime(HTTP_DATE_FORMAT), 'ETag': resource['hash'], } # ------------------------------------------------------------ # Check the if-modified-since header if 'if-modified-since' in request.headers: try: if_modified_since_date = datetime.strptime( request.headers['if-modified-since'], HTTP_DATE_FORMAT) except: raise BadRequest("Invalid If-Modified-Since header value") if if_modified_since_date >= resource['mtime']: # The resource was not modified -> return ``304 NOT MODIFIED`` return Response('', status=304, headers=headers) # ------------------------------------------------------------ # Stream the response data with db: lobject = db.lobject(oid=resource['data_oid'], mode='rb') data = lobject.read() lobject.close() return Response(data, status=200, headers=headers)