Пример #1
0
def get_image(album_name, image_name, username):
    gallery_db = connect_to_db()
    albums = gallery_db.albums

    requested_album = albums.find_one({"name": album_name})
    if not requested_album:
        return redirect(url_for('static', filename='image_not_found.gif'))

    if not (username in requested_album["write"] or username in requested_album["read"]):
        return redirect(url_for('static', filename='image_not_found.gif'))

    if image_name not in requested_album["images"]:
        return redirect(url_for('static', filename='image_not_found.gif'))

    try:
    	stats_download_timer = stats_client.timer("download timer")
    	# start to time the download
    	stats_download_timer.start()
        blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY)
        data = blob_service.get_blob_to_bytes(CONTAINER_NAME, image_name)

        response = make_response(data)
        response.headers["Content-Disposition"] = "filename=%s.jpg" % image_name
        response.headers['Content-type'] = 'image/jpeg'
        stats_download_timer.stop()
        stats_client.incr("images downloaded", 1)
        return response
    except Exception as ex:
        # TODO: different image in this case?
        stats_download_timer.stop()
        return redirect(url_for('static', filename='image_not_found.gif'))
Пример #2
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name):
        BaseTransfer.__init__(self)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name, account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")

    def get_metadata_for_key(self, key):
        key = fix_path(key)
        return self.list_path(key)[0]['metadata']

    def list_path(self, path):
        return_list = []
        path = fix_path(path)
        self.log.info("Asking for listing of: %r", path)
        for r in self.conn.list_blobs(self.container_name, prefix=path, delimiter="/",
                                      include="metadata"):
            entry = {"name": r.name, "size": r.properties.content_length,
                     "last_modified": dateutil.parser.parse(r.properties.last_modified),
                     "metadata": r.metadata}
            return_list.append(entry)
        return return_list

    def delete_key(self, key_name):
        key_name = fix_path(key_name)
        self.log.debug("Deleting key: %r", key_name)
        return self.conn.delete_blob(self.container_name, key_name)

    def get_contents_to_file(self, obj_key, filepath_to_store_to):
        obj_key = fix_path(obj_key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, obj_key, filepath_to_store_to)

    def get_contents_to_string(self, obj_key):
        obj_key = fix_path(obj_key)
        self.log.debug("Starting to fetch the contents of: %r", obj_key)
        return self.conn.get_blob_to_bytes(self.container_name, obj_key), self.get_metadata_for_key(obj_key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        # For whatever reason Azure requires all values to be strings at the point of sending
        metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items())
        self.conn.put_block_blob_from_bytes(self.container_name, key, memstring,
                                            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self, key, filepath, metadata=None):
        # For whatever reason Azure requires all values to be strings at the point of sending
        metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items())
        self.conn.put_block_blob_from_path(self.container_name, key, filepath,
                                           x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time)
        return container_name
Пример #3
0
def download_chunck(path, storagename, container, key):
    blob_service = BlobService(account_name=storagename, account_key=key)
    loop = 0
    
    while True:
        try:
            return blob_service.get_blob_to_bytes(container, path)
        except (azure.http.HTTPError, TimeoutError) as e:
            loop = loop + 1
            if loop >= 3:
                raise e
Пример #4
0
class PerformanceTests(unittest.TestCase):
    def setUp(self):
        self.workspace = Workspace(settings.workspace.id,
                                   settings.workspace.token,
                                   settings.workspace.endpoint)
        self.blob = BlobService(settings.storage.account_name,
                                settings.storage.account_key)

    def _write_blob_contents(self, filename, data):
        if settings.diagnostics.write_blob_contents:
            with open('original-blob-' + filename, 'wb') as data_file:
                data_file.write(data)

    def _write_serialized_frame(self, filename, data):
        if settings.diagnostics.write_serialized_frame:
            with open('serialized-frame-' + filename, 'wb') as data_file:
                data_file.write(data)

    def test_serialize_40mb_dataframe(self):
        # Arrange
        blob_name = settings.storage.medium_size_blob
        original_data = self.blob.get_blob_to_bytes(settings.storage.container,
                                                    blob_name)
        original_dataframe = pd.read_csv(BytesIO(original_data),
                                         header=0,
                                         sep=",",
                                         encoding='utf-8-sig')

        self._write_blob_contents(blob_name, original_data)

        # Act
        start_time = datetime.now()
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSV, original_dataframe)
        elapsed_time = datetime.now() - start_time
        result_data = writer.getvalue()

        self._write_serialized_frame(blob_name, result_data)

        # Assert
        result_dataframe = pd.read_csv(BytesIO(result_data),
                                       header=0,
                                       sep=",",
                                       encoding='utf-8-sig')
        assert_frame_equal(original_dataframe, result_dataframe)
        self.assertLess(elapsed_time.total_seconds(), 10)
class PerformanceTests(unittest.TestCase):
    def setUp(self):
        self.workspace = Workspace(
            settings.workspace.id,
            settings.workspace.token,
            settings.workspace.endpoint
        )
        self.blob = BlobService(
            settings.storage.account_name,
            settings.storage.account_key
        )

    def _write_blob_contents(self, filename, data):
        if settings.diagnostics.write_blob_contents:
            with open('original-blob-' + filename, 'wb') as data_file:
                data_file.write(data)

    def _write_serialized_frame(self, filename, data):
        if settings.diagnostics.write_serialized_frame:
            with open('serialized-frame-' + filename, 'wb') as data_file:
                data_file.write(data)

    def test_serialize_40mb_dataframe(self):
        # Arrange
        blob_name = settings.storage.medium_size_blob
        original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name)
        original_dataframe = pd.read_csv(BytesIO(original_data), header=0, sep=",", encoding='utf-8-sig')

        self._write_blob_contents(blob_name, original_data)

        # Act
        start_time = datetime.now()
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSV, original_dataframe)
        elapsed_time = datetime.now() - start_time
        result_data = writer.getvalue()

        self._write_serialized_frame(blob_name, result_data)

        # Assert
        result_dataframe = pd.read_csv(BytesIO(result_data), header=0, sep=",", encoding='utf-8-sig')
        assert_frame_equal(original_dataframe, result_dataframe)
        self.assertLess(elapsed_time.total_seconds(), 10)
Пример #6
0
def rest_get_image(album_name, image_name, username):
    gallery_db = connect_to_db()
    albums = gallery_db.albums

    requested_album = albums.find_one({"name": album_name})
    if not requested_album:
        return jsonify({'error': "album does not exist"})

    if not (username in requested_album["write"] or username in requested_album["read"]):
        return jsonify({'error': "no permission to get images"})

    if image_name not in requested_album["images"]:
        return jsonify({'error': "no such image in album"})

    blob_service = BlobService(account_name = ACCOUNT_NAME, account_key = ACCOUNT_KEY)
    data = blob_service.get_blob_to_bytes(CONTAINER_NAME, image_name)

    response = make_response(data)
    response.headers["Content-Disposition"] = "attachment; filename=%s" % image_name
    return response
Пример #7
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name, prefix=None):
        # NOTE: Azure wants all paths to start with a slash
        prefix = "/{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name, account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")
        # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking
        self.log.warning("AzureTransfer is experimental and has not been thoroughly tested")

    def get_metadata_for_key(self, key):
        key = self.format_key_for_backend(key)
        return self._list_blobs(key)[0]["metadata"]

    def _metadata_for_key(self, key):
        return self._list_blobs(key)[0]["metadata"]

    def list_path(self, key):
        path = self.format_key_for_backend(key, trailing_slash=True)
        return self._list_blobs(path)

    def _list_blobs(self, path):
        self.log.debug("Listing path %r", path)
        items = self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata")
        result = []
        for item in items:
            result.append({
                "last_modified": dateutil.parser.parse(item.properties.last_modified),
                "metadata": item.metadata,
                "name": self.format_key_from_backend(item.name),
                "size": item.properties.content_length,
            })
        return result

    def delete_key(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Deleting key: %r", key)
        return self.conn.delete_blob(self.container_name, key)

    def get_contents_to_file(self, key, filepath_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", key, filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, key, filepath_to_store_to)

    def get_contents_to_fileobj(self, key, fileobj_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_file(self.container_name, key, fileobj_to_store_to)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_bytes(self.container_name, key), self._metadata_for_key(key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_bytes(self.container_name, key, memstring,
                                            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self, key, filepath, metadata=None, multipart=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_path(self.container_name, key, filepath,
                                           x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time)
        return container_name
Пример #8
0
class AzureTransfer(BaseTransfer):
    def __init__(self, account_name, account_key, container_name, prefix=None):
        # NOTE: Azure wants all paths to start with a slash
        prefix = "/{}".format(prefix.lstrip("/") if prefix else "")
        super().__init__(prefix=prefix)
        self.account_name = account_name
        self.account_key = account_key
        self.container_name = container_name
        self.conn = BlobService(account_name=self.account_name,
                                account_key=self.account_key)
        self.container = self.get_or_create_container(self.container_name)
        self.log.debug("AzureTransfer initialized")
        # XXX: AzureTransfer isn't actively tested and hasn't its error handling is probably lacking
        self.log.warning(
            "AzureTransfer is experimental and has not been thoroughly tested")

    def get_metadata_for_key(self, key):
        key = self.format_key_for_backend(key)
        return self._list_blobs(key)[0]["metadata"]

    def _metadata_for_key(self, key):
        return self._list_blobs(key)[0]["metadata"]

    def list_path(self, key):
        path = self.format_key_for_backend(key, trailing_slash=True)
        return self._list_blobs(path)

    def _list_blobs(self, path):
        self.log.debug("Listing path %r", path)
        items = self.conn.list_blobs(self.container_name,
                                     prefix=path,
                                     delimiter="/",
                                     include="metadata")
        result = []
        for item in items:
            result.append({
                "last_modified":
                dateutil.parser.parse(item.properties.last_modified),
                "metadata":
                item.metadata,
                "name":
                self.format_key_from_backend(item.name),
                "size":
                item.properties.content_length,
            })
        return result

    def delete_key(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Deleting key: %r", key)
        return self.conn.delete_blob(self.container_name, key)

    def get_contents_to_file(self, key, filepath_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r to: %r", key,
                       filepath_to_store_to)
        return self.conn.get_blob_to_path(self.container_name, key,
                                          filepath_to_store_to)

    def get_contents_to_fileobj(self, key, fileobj_to_store_to):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_file(self.container_name, key,
                                          fileobj_to_store_to)

    def get_contents_to_string(self, key):
        key = self.format_key_for_backend(key)
        self.log.debug("Starting to fetch the contents of: %r", key)
        return self.conn.get_blob_to_bytes(self.container_name,
                                           key), self._metadata_for_key(key)

    def store_file_from_memory(self, key, memstring, metadata=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_bytes(
            self.container_name,
            key,
            memstring,
            x_ms_meta_name_values=metadata_to_send)

    def store_file_from_disk(self,
                             key,
                             filepath,
                             metadata=None,
                             multipart=None):
        key = self.format_key_for_backend(key)
        # Azure requires all metadata keys and values to be strings
        metadata_to_send = {str(k): str(v) for k, v in metadata.items()}
        self.conn.put_block_blob_from_path(
            self.container_name,
            key,
            filepath,
            x_ms_meta_name_values=metadata_to_send)

    def get_or_create_container(self, container_name):
        start_time = time.time()
        self.conn.create_container(container_name)
        self.log.debug("Got/Created container: %r successfully, took: %.3fs",
                       container_name,
                       time.time() - start_time)
        return container_name
class RoundTripTests(unittest.TestCase):
    def setUp(self):
        self.workspace = Workspace(settings.workspace.id, settings.workspace.token, settings.workspace.endpoint)
        self.blob = BlobService(settings.storage.account_name, settings.storage.account_key)

    def _write_blob_contents(self, filename, data):
        if settings.diagnostics.write_blob_contents:
            with open("original-blob-" + filename, "wb") as data_file:
                data_file.write(data)

    def _write_serialized_frame(self, filename, data):
        if settings.diagnostics.write_serialized_frame:
            with open("serialized-frame-" + filename, "wb") as data_file:
                data_file.write(data)

    def test_download_blob_then_upload_as_dataframe_then_read_dataset(self):
        def datatypeid_from_header_and_format(header, format):
            if format == "csv":
                if header == "wh":
                    return DataTypeIds.GenericCSV
                else:
                    return DataTypeIds.GenericCSVNoHeader
            elif format == "tsv":
                if header == "wh":
                    return DataTypeIds.GenericTSV
                else:
                    return DataTypeIds.GenericTSVNoHeader
            elif format == "txt":
                return DataTypeIds.PlainText
            else:
                self.assertTrue(False, "Unexpected format")

        def split_blob_name(blob_name):
            # blob naming convention:
            # name_<header>.<format>
            # <header>: WH: with header
            #           NH: no header
            # <format>: CSV: comma separated
            #           TSV: tab separated
            #           TXT: newline separated
            name, format = blob_name.lower().split(".")
            if format != "txt":
                name, header = name.split("_")
            else:
                header = "nh"

            return name, format, header

        for blob_name in settings.storage.blobs:
            print(blob_name)

            name, format, header = split_blob_name(blob_name)

            # Read the data from blob storage
            original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name)
            self._write_blob_contents(blob_name, original_data)

            # Parse the data to a dataframe using Pandas
            original_dataframe = pd.read_csv(
                BytesIO(original_data),
                header=0 if header == "wh" else None,
                sep="," if format == "csv" else "\t" if format == "tsv" else "\n",
                encoding="utf-8-sig",
            )

            # Upload the dataframe as a new dataset
            dataset_name = "unittest" + name + id_generator()
            description = "safe to be deleted - " + dataset_name
            data_type_id = datatypeid_from_header_and_format(header, format)
            self.workspace.datasets.add_from_dataframe(original_dataframe, data_type_id, dataset_name, description)

            # Get the new dataset
            dataset = self.workspace.datasets[dataset_name]
            self.assertIsNotNone(dataset)

            # Read the dataset as a dataframe
            result_data = dataset.read_as_binary()
            self._write_serialized_frame(blob_name, result_data)
            result_dataframe = dataset.to_dataframe()

            # Verify that the dataframes are equal
            assert_frame_equal(original_dataframe, result_dataframe)

    def test_azureml_example_datasets(self):
        max_size = 10 * 1024 * 1024
        skip = ["Restaurant feature data", "IMDB Movie Titles", "Book Reviews from Amazon"]

        for dataset in self.workspace.example_datasets:
            if not hasattr(dataset, "to_dataframe"):
                print("skipped (unsupported format): {0}".format(dataset.name))
                continue

            if dataset.size > max_size:
                print("skipped (max size): {0}".format(dataset.name))
                continue

            if dataset.name in skip:
                print("skipped: {0}".format(dataset.name))
                continue

            print("downloading: " + dataset.name)
            frame = dataset.to_dataframe()

            print("uploading: " + dataset.name)
            dataset_name = "unittest" + dataset.name + id_generator()
            description = "safe to be deleted - " + dataset_name
            self.workspace.datasets.add_from_dataframe(frame, dataset.data_type_id, dataset_name, description)
Пример #10
0
class RoundTripTests(unittest.TestCase):
    def setUp(self):
        self.workspace = Workspace(
            settings.workspace.id,
            settings.workspace.token,
            settings.workspace.endpoint
        )
        self.blob = BlobService(
            settings.storage.account_name,
            settings.storage.account_key
        )

    def _write_blob_contents(self, filename, data):
        if settings.diagnostics.write_blob_contents:
            with open('original-blob-' + filename, 'wb') as data_file:
                data_file.write(data)

    def _write_serialized_frame(self, filename, data):
        if settings.diagnostics.write_serialized_frame:
            with open('serialized-frame-' + filename, 'wb') as data_file:
                data_file.write(data)

    def test_download_blob_then_upload_as_dataframe_then_read_dataset(self):
        def datatypeid_from_header_and_format(header, format):
            if format == 'csv':
                if header == 'wh':
                    return DataTypeIds.GenericCSV
                else:
                    return DataTypeIds.GenericCSVNoHeader
            elif format == 'tsv':
                if header == 'wh':
                    return DataTypeIds.GenericTSV
                else:
                    return DataTypeIds.GenericTSVNoHeader
            elif format == 'txt':
                return DataTypeIds.PlainText
            else:
                self.assertTrue(False, 'Unexpected format')

        def split_blob_name(blob_name):
            # blob naming convention:
            # name_<header>.<format>
            # <header>: WH: with header
            #           NH: no header
            # <format>: CSV: comma separated
            #           TSV: tab separated
            #           TXT: newline separated
            name, format = blob_name.lower().split('.')
            if format != 'txt':
                name, header = name.split('_')
            else:
                header = 'nh'

            return name, format, header

        for blob_name in settings.storage.blobs:
            print(blob_name)

            name, format, header = split_blob_name(blob_name)

            # Read the data from blob storage
            original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name)
            self._write_blob_contents(blob_name, original_data)

            # Parse the data to a dataframe using Pandas
            original_dataframe = pd.read_csv(
                BytesIO(original_data),
                header=0 if header == 'wh' else None,
                sep=',' if format == 'csv' else '\t' if format == 'tsv' else '\n',
                encoding='utf-8-sig'
            )

            # Upload the dataframe as a new dataset
            dataset_name = 'unittest' + name + id_generator()
            description = 'safe to be deleted - ' + dataset_name
            data_type_id = datatypeid_from_header_and_format(header, format)
            self.workspace.datasets.add_from_dataframe(
                original_dataframe,
                data_type_id,
                dataset_name,
                description,
            )

            # Get the new dataset
            dataset = self.workspace.datasets[dataset_name]
            self.assertIsNotNone(dataset)

            # Read the dataset as a dataframe
            result_data = dataset.read_as_binary()
            self._write_serialized_frame(blob_name, result_data)
            result_dataframe = dataset.to_dataframe()

            # Verify that the dataframes are equal
            assert_frame_equal(original_dataframe, result_dataframe)

    def test_azureml_example_datasets(self):
        max_size = 10 * 1024 * 1024
        skip = [
            'Restaurant feature data',
            'IMDB Movie Titles',
            'Book Reviews from Amazon',
        ]

        for dataset in self.workspace.example_datasets:
            if not hasattr(dataset, 'to_dataframe'):
                print('skipped (unsupported format): {0}'.format(dataset.name))
                continue

            if dataset.size > max_size:
                print('skipped (max size): {0}'.format(dataset.name))
                continue

            if dataset.name in skip:
                print('skipped: {0}'.format(dataset.name))
                continue

            print('downloading: ' + dataset.name)
            frame = dataset.to_dataframe()

            print('uploading: ' + dataset.name)
            dataset_name = 'unittest' + dataset.name + id_generator()
            description = 'safe to be deleted - ' + dataset_name
            self.workspace.datasets.add_from_dataframe(frame, dataset.data_type_id, dataset_name, description)