def get_images(info, limit, offset): if 'image' in info: image_info: Dict = info['image'] image_url_prefix = image_info['url'] image_type = image_info['type'] storage_client = storage.get_storage_client(image_url_prefix) if image_type == '2D': images = [storage_client.get_signed_url(f'{image_url_prefix}.jpg')] elif image_type == '3D': if limit is None: upper_bound = image_info['count'] else: upper_bound = min(image_info['count'], offset + limit) images = [ storage_client.get_signed_url(f'{image_url_prefix}-{i}.jpg') for i in range(offset, upper_bound) ] else: raise ValueError(f"Unsupported image_type '{image_type}'") elif any([ info['data']['url'].lower().endswith(ext) for ext in ('.png', '.jpg', '.jpeg') ]): storage_client = storage.get_storage_client(info['data']['url']) public_image_url = storage_client.get_signed_url(info['data']['url']) images = [public_image_url] else: images = [] return images
def test_get_images_filesystem(): arr = numpy.zeros((12, 5, 3)) stream = io.BytesIO() numpy.save(stream, arr) stream.seek(0) image_type = '3D' image_url = 'file://blueno/3d-array' data_url = 'file://blueno/3d-array.npy' info = { 'image': { 'url': image_url, 'type': image_type, 'count': arr.shape[0], } } client = storage.get_storage_client(data_url) client.put(data_url, stream) image._create_images(image_type, image_url, data_url) with app.test_request_context('http://www.example.com/rest/of/the/route'): urls = image.get_images(info, 10, 2) assert len(urls) == 10 assert urls[0].startswith( 'http://www.example.com/data/download?url=file://blueno/3d-array')
def test_get_images_azure1(): arr = numpy.zeros((120, 5, 3)) stream = io.BytesIO() numpy.save(stream, arr) stream.seek(0) image_type = '2D' image_url = 'az://blueno2/2d-array' data_url = 'az://blueno2/2d-array.npy' info = { 'image': { 'url': image_url, 'type': image_type, 'count': arr.shape[0], } } client = storage.get_storage_client(data_url) client.put(data_url, stream) image._create_images(image_type, image_url, data_url) urls = image.get_images(info, 1, 0) assert len(urls) == 1 assert urls[0].startswith( 'https://blueno.blob.core.windows.net/blueno2/2d-array.jpg')
def download_object(): object_url = flask.request.args.get('url', None) # TODO: Implement signed URLs so that we can secure this endpoint token = flask.request.args.get('token', None) # noqa: F841 try: storage_client = storage.get_storage_client(object_url) except ValueError as e: return flask.jsonify({'message': str(e)}), 400 stream = storage_client.get(object_url) return flask.send_file(stream, mimetype='application/octet-stream', attachment_filename=object_url)
def _upload_to_uri(img: Image.Image, uri: str): """ Uploads the image to the storage URI. :param img: a PIL.Image.Image object :param uri: the URI to save the image to (should start with gs://, az://, etc.) """ stream = io.BytesIO() img.save(stream, format='jpeg') stream.seek(0) storage_client = storage.get_storage_client(uri) storage_client.put(uri, stream)
def test_create_images_2d_npy_3_channels(): arr = numpy.zeros((5, 5, 3)) stream = io.BytesIO() numpy.save(stream, arr) stream.seek(0) image_type = '2D' image_url = 'temp://2d-array' data_url = 'temp://2d-array.npy' client = storage.get_storage_client(data_url) client.put(data_url, stream) image._create_images(image_type, image_url, data_url) assert client.exists(image_url + '.jpg')
def test_get_images_jpg(): stream = io.BytesIO(b'some bytes that should represent a JPG file') stream.seek(0) data_url1 = 'temp://blueno/some-image.jpg' data_url2 = 'temp://blueno/some-image.JPEG' client = storage.get_storage_client(data_url1) client.put(data_url1, stream) client.put(data_url2, stream) urls1 = image.get_images({'data': {'url': data_url1}}, 1, 0) urls2 = image.get_images({'data': {'url': data_url2}}, 1, 0) assert len(urls1) == 1 assert len(urls2) == 1
def test_create_images_3d_npy(): arr = numpy.zeros((5, 5, 10)) stream = io.BytesIO() numpy.save(stream, arr) stream.seek(0) image_type = '3D' image_url = 'temp://3d-array' data_url = 'temp://3d-array.npy' client = storage.get_storage_client(data_url) client.put(data_url, stream) image._create_images(image_type, image_url, data_url) assert client.exists(f'{image_url}-{0}.jpg') assert client.exists(f'{image_url}-{4}.jpg') assert not client.exists(f'{image_url}-{5}.jpg')
def upload_object(): # 1. Validate the request object_url = flask.request.args.get('url', None) if object_url is None: return flask.jsonify({'message': "No url query param found"}), 400 if 'file' not in flask.request.files: return flask.jsonify({'message': "No file found"}), 400 file = flask.request.files['file'] # 2. Save the data try: storage_client = storage.get_storage_client(object_url) except ValueError as e: return flask.jsonify({'message': str(e)}), 400 storage_client.put(object_url, file.stream) return flask.jsonify({})
def test_create_images_3d_npz(): # npz files are no longer supported arr_0 = numpy.zeros((5, 5, 9)) arr_1 = numpy.ones((111, 4, 5)) arr_2 = numpy.ones((6, 2, 17)) stream = io.BytesIO() numpy.savez(stream, arr_0, arr_1, arr_2) stream.seek(0) image_type = '3D' image_url = 'temp://3d-array' data_url = 'temp://3d-array.npz' client = storage.get_storage_client(data_url) client.put(data_url, stream) with pytest.raises(ValueError): image._create_images(image_type, image_url, data_url)
def test_create_images_2d_npz(): # This used to be supported now it should throw an exception arr0 = numpy.zeros((5, 5)) arr1 = numpy.ones((5, 4, 1)) arr2 = numpy.ones((6, 2, 3)) stream = io.BytesIO() numpy.savez(stream, arr0, arr1, arr2) stream.seek(0) image_type = '2D' image_url = 'temp://2d-array.jpg' data_url = 'temp://2d-array.npz' client = storage.get_storage_client(data_url) client.put(data_url, stream) with pytest.raises(IOError): image._create_images(image_type, image_url, data_url)
def _create_images(image_type: str, image_url_prefix: str, data_url: str) -> int: """ Creates images from the given info. :param image_type: either '2D' or '3D' :param image_url_prefix: the internal url prefix of the image. This should not end with '/' or a file extension. :param data_url: the internal URL of the data :return: the """ # _create_images and get_images should have mirrored structures storage_client = storage.get_storage_client(data_url) data_stream = storage_client.get(data_url) if image_type == '2D': if data_url.lower().endswith('.npy'): arr = numpy.load(data_stream) _create_image_from_npy(arr, f'{image_url_prefix}.jpg') return 1 elif data_url.lower().endswith('.tfrecord'): raise NotImplementedError() else: # This will fail if the format is unsupported img = Image.open(data_stream) _upload_to_uri(img, f'{image_url_prefix}.jpg') return 1 elif image_type == '3D': if data_url.lower().endswith('.npy'): # Create an image per each slice # TODO: This assumes the data is column first not column last. # Document this somewhere arr = numpy.load(data_stream) for i in range(arr.shape[0]): img = _create_rgb_image(arr[i]) _upload_to_uri(img, f'{image_url_prefix}-{i}.jpg') return arr.shape[0] else: raise ValueError(f'Cannot create images for' f' image_type={image_type} data_url={data_url}') elif image_type == 'CT': raise NotImplementedError(f'Image type {image_type} not supported yet') else: raise ValueError(f'Image type {image_type} not supported')
def delete_sample(dataset_name, name): payload = flask.request.json conn = db.get_conn() if payload and payload.get('purge', None): with conn.cursor() as cur: cur.execute( """ SELECT s.info FROM samples AS s JOIN datasets AS d ON s.dataset_id = d.id WHERE s.name = %s AND d.name = %s; """, (name, dataset_name)) row = cur.fetchone() if row is None: raise db.NotFoundException(f"Could not find sample '{name}'") info = row[0] data_info: Dict = info['data'] data_url: str = data_info['url'] storage_client = storage.get_storage_client(data_url) storage_client.delete(data_url) with conn.cursor() as cur: cur.execute( """ DELETE FROM samples WHERE name = %s AND dataset_id = ( SELECT id FROM datasets WHERE name = %s ) RETURNING id; """, (name, dataset_name)) conn.commit() returned_row = cur.fetchone() if returned_row is None: raise db.NotFoundException(f"Sample '{name}' was not found") deleted_id = returned_row[0] return flask.jsonify({ 'id': deleted_id, })
def test_get_images_azure2(): arr = numpy.zeros((12, 5, 3)) stream = io.BytesIO() numpy.save(stream, arr) stream.seek(0) image_type = '3D' image_url = 'az://blueno/3d-array' data_url = 'az://blueno/3d-array.npy' info = { 'image': { 'url': image_url, 'type': image_type, 'count': arr.shape[0], } } client = storage.get_storage_client(data_url) client.put(data_url, stream) image._create_images(image_type, image_url, data_url) urls = image.get_images(info, 10, 2) assert len(urls) == 10
def register_sample(dataset_name, name): # 1. Validate the payload payload = flask.request.get_json() if payload is None: return flask.jsonify( {'message': 'No JSON payload found. Cannot register the sample'}), 400 info = payload.get('info', None) if info is None: return flask.jsonify( {'message': 'No info object found. Cannot register the sample'}), 400 if 'data' not in info: return flask.jsonify({ 'message': 'No data field found in info.' ' Cannot register the sample.' }), 400 if 'url' not in info['data']: return flask.jsonify({ 'message': 'No url field found in info["data"].' ' Cannot register the sample.' }), 400 # 2. Check whether the sample exists at the given data_url data_url: str = info['data']['url'] if payload.get('validate', True): try: client = storage.get_storage_client(data_url) except ValueError: return flask.jsonify({ 'message': f'Cannot find valid storage client for {data_url}.' ' Validation failed.' }), 400 if not client.exists(data_url): return flask.jsonify({ 'message': f'No data was found at info["data"]={data_url}.' ' Validation failed.' }), 400 # 3. Update info['info'] if 'image' in info: image_type = info['image'].get('type', None) if image_type is None: return flask.jsonify({ 'message': 'No type field found in info["image"].' ' Cannot make an image of the sample.' }), 400 # TODO: Allow users to define their own image url root through the # admin page if env.FILESYSTEM_STORE_ROOT is None: return flask.jsonify({ 'message': 'Filesystem store not enabled, not creating images.' }), 400 image_url = f'file://images/{dataset_name}/{name}' info['image']['url'] = image_url info['image']['status'] = 'CREATING' # We create the images once we know the sample ID elif data_url.startswith((storage.clients.AZURE_PREFIX, storage.clients.GCS_PREFIX)) \ and data_url.lower().endswith(('.png', '.jpg', '.jpeg')): info['image'] = { 'url': data_url, 'type': 'FROM_DATA', 'status': 'CREATED', } # 4. Register the sample in the database conn = db.get_conn() created_at = datetime.datetime.now(datetime.timezone.utc) info_json = json.dumps(info) with conn.cursor() as cur: cur.execute( """ SELECT id FROM datasets WHERE name = %s; """, (dataset_name, )) result = cur.fetchone() if result is None: raise db.NotFoundException(f"Dataset '{dataset_name}' not found") dataset_id = result[0] try: cur.execute( """ INSERT INTO samples ( name, info, dataset_id, created_at, last_updated) VALUES (%s, %s, %s, %s, %s) RETURNING id; """, (name, info_json, dataset_id, created_at, created_at)) except psycopg2.IntegrityError: conn.rollback() raise db.ConflictException(f"Sample '{name}' already exists.") else: conn.commit() sample_id = cur.fetchone()[0] # 5. Create the image now or queue a job to create the image if ('image' in info and info['image']['status'] == 'CREATING' and data_url.startswith(storage.clients.FILESYSTEM_PREFIX)): if info['image']['type'] == '2D': image.create_images(sample_id) else: image.enqueue_create_images(sample_id) return flask.jsonify({ 'id': sample_id, })